From 6d187f0eff662a50057dfb379aeb5e9111239bf0 Mon Sep 17 00:00:00 2001
From: Alex Lorenz <arphaman@gmail.com>
Date: Fri, 12 Jul 2019 22:06:08 +0000
Subject: [PATCH 001/451] [macCatalyst] Use macCatalyst pretty name in
 .build_version darwin assembly command

'macCatalyst' is more readable than 'maccatalyst'. I renamed the objdump output,
but the assembly should match it as well.

llvm-svn: 365964
---
 llvm/lib/MC/MCAsmStreamer.cpp                  | 2 +-
 llvm/lib/MC/MCParser/DarwinAsmParser.cpp       | 2 +-
 llvm/test/CodeGen/X86/macCatalyst.ll           | 2 +-
 llvm/test/MC/MachO/build-version-maccatalyst.s | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index a78092ec45824..7e8f02e3a1aa5 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -541,7 +541,7 @@ static const char *getPlatformName(MachO::PlatformType Type) {
   case MachO::PLATFORM_TVOS:             return "tvos";
   case MachO::PLATFORM_WATCHOS:          return "watchos";
   case MachO::PLATFORM_BRIDGEOS:         return "bridgeos";
-  case MachO::PLATFORM_MACCATALYST:      return "maccatalyst";
+  case MachO::PLATFORM_MACCATALYST:      return "macCatalyst";
   case MachO::PLATFORM_IOSSIMULATOR:     return "iossimulator";
   case MachO::PLATFORM_TVOSSIMULATOR:    return "tvossimulator";
   case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator";
diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 1b5b01267343a..1160934dc62c4 100644
--- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -1169,7 +1169,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
     .Case("ios", MachO::PLATFORM_IOS)
     .Case("tvos", MachO::PLATFORM_TVOS)
     .Case("watchos", MachO::PLATFORM_WATCHOS)
-    .Case("maccatalyst", MachO::PLATFORM_MACCATALYST)
+    .Case("macCatalyst", MachO::PLATFORM_MACCATALYST)
     .Default(0);
   if (Platform == 0)
     return Error(PlatformLoc, "unknown platform name");
diff --git a/llvm/test/CodeGen/X86/macCatalyst.ll b/llvm/test/CodeGen/X86/macCatalyst.ll
index 7a126f1665d21..fbf3af54771b0 100644
--- a/llvm/test/CodeGen/X86/macCatalyst.ll
+++ b/llvm/test/CodeGen/X86/macCatalyst.ll
@@ -1,3 +1,3 @@
 ; RUN: llc %s -o - | FileCheck %s
 target triple="x86_64-apple-ios13.0-macabi"
-; CHECK: .build_version maccatalyst, 13, 0
+; CHECK: .build_version macCatalyst, 13, 0
diff --git a/llvm/test/MC/MachO/build-version-maccatalyst.s b/llvm/test/MC/MachO/build-version-maccatalyst.s
index 9056780810d10..aff5589264e37 100644
--- a/llvm/test/MC/MachO/build-version-maccatalyst.s
+++ b/llvm/test/MC/MachO/build-version-maccatalyst.s
@@ -1,4 +1,4 @@
 // RUN: llvm-mc -triple x86_64-apple-ios %s | FileCheck %s
 
-.build_version maccatalyst,13,0
-// CHECK: .build_version maccatalyst, 13, 0
+.build_version macCatalyst,13,0
+// CHECK: .build_version macCatalyst, 13, 0

From d8ddf839505a1aeb8a7b1b3cdeea8a5cad3b1db0 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Fri, 12 Jul 2019 22:08:25 +0000
Subject: [PATCH 002/451] [WebAssembly] refactored utilities to not depend on
 MachineInstr

Summary:
Most of these functions can work for MachineInstr and MCInst
equally now.

Reviewers: dschuff

Subscribers: MatzeB, sbc100, jgravelle-google, aheejin, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64643

llvm-svn: 365965
---
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    | 255 ++++++++++++++++--
 .../WebAssembly/WebAssemblyArgumentMove.cpp   |   4 +-
 .../WebAssembly/WebAssemblyCFGStackify.cpp    |   2 +-
 .../WebAssembly/WebAssemblyExplicitLocals.cpp |   8 +-
 .../WebAssembly/WebAssemblyFrameLowering.cpp  |   3 +-
 .../WebAssembly/WebAssemblyMCInstLower.cpp    |   4 +-
 .../WebAssemblyPrepareForLiveIntervals.cpp    |   4 +-
 .../WebAssembly/WebAssemblyRegNumbering.cpp   |   2 +-
 .../WebAssembly/WebAssemblyRegStackify.cpp    |   4 +-
 .../WebAssembly/WebAssemblyUtilities.cpp      | 219 +--------------
 .../Target/WebAssembly/WebAssemblyUtilities.h |  10 -
 11 files changed, 254 insertions(+), 261 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 67532013afd8a..a0d526b8a2e03 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -122,9 +122,27 @@ enum TOF {
 namespace llvm {
 namespace WebAssembly {
 
+/// This is used to indicate block signatures.
+enum class ExprType : unsigned {
+  Void = 0x40,
+  I32 = 0x7F,
+  I64 = 0x7E,
+  F32 = 0x7D,
+  F64 = 0x7C,
+  V128 = 0x7B,
+  ExceptRef = 0x68,
+  Invalid = 0x00
+};
+
+/// Instruction opcodes emitted via means other than CodeGen.
+static const unsigned Nop = 0x01;
+static const unsigned End = 0x0b;
+
+wasm::ValType toValType(const MVT &Ty);
+
 /// Return the default p2align value for a load or store with the given opcode.
-inline unsigned GetDefaultP2AlignAny(unsigned Opcode) {
-  switch (Opcode) {
+inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
+  switch (Opc) {
   case WebAssembly::LOAD8_S_I32:
   case WebAssembly::LOAD8_S_I32_S:
   case WebAssembly::LOAD8_U_I32:
@@ -337,31 +355,230 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opcode) {
   }
 }
 
-inline unsigned GetDefaultP2Align(unsigned Opcode) {
-  auto Align = GetDefaultP2AlignAny(Opcode);
+inline unsigned GetDefaultP2Align(unsigned Opc) {
+  auto Align = GetDefaultP2AlignAny(Opc);
   if (Align == -1U) {
     llvm_unreachable("Only loads and stores have p2align values");
   }
   return Align;
 }
 
-/// This is used to indicate block signatures.
-enum class ExprType : unsigned {
-  Void = 0x40,
-  I32 = 0x7F,
-  I64 = 0x7E,
-  F32 = 0x7D,
-  F64 = 0x7C,
-  V128 = 0x7B,
-  ExceptRef = 0x68,
-  Invalid = 0x00
-};
+inline bool isArgument(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::ARGUMENT_i32:
+  case WebAssembly::ARGUMENT_i32_S:
+  case WebAssembly::ARGUMENT_i64:
+  case WebAssembly::ARGUMENT_i64_S:
+  case WebAssembly::ARGUMENT_f32:
+  case WebAssembly::ARGUMENT_f32_S:
+  case WebAssembly::ARGUMENT_f64:
+  case WebAssembly::ARGUMENT_f64_S:
+  case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v16i8_S:
+  case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v8i16_S:
+  case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4i32_S:
+  case WebAssembly::ARGUMENT_v2i64:
+  case WebAssembly::ARGUMENT_v2i64_S:
+  case WebAssembly::ARGUMENT_v4f32:
+  case WebAssembly::ARGUMENT_v4f32_S:
+  case WebAssembly::ARGUMENT_v2f64:
+  case WebAssembly::ARGUMENT_v2f64_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
-/// Instruction opcodes emitted via means other than CodeGen.
-static const unsigned Nop = 0x01;
-static const unsigned End = 0x0b;
+inline bool isCopy(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::COPY_I32:
+  case WebAssembly::COPY_I32_S:
+  case WebAssembly::COPY_I64:
+  case WebAssembly::COPY_I64_S:
+  case WebAssembly::COPY_F32:
+  case WebAssembly::COPY_F32_S:
+  case WebAssembly::COPY_F64:
+  case WebAssembly::COPY_F64_S:
+  case WebAssembly::COPY_V128:
+  case WebAssembly::COPY_V128_S:
+  case WebAssembly::COPY_EXCEPT_REF:
+  case WebAssembly::COPY_EXCEPT_REF_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
-wasm::ValType toValType(const MVT &Ty);
+inline bool isTee(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::TEE_I32:
+  case WebAssembly::TEE_I32_S:
+  case WebAssembly::TEE_I64:
+  case WebAssembly::TEE_I64_S:
+  case WebAssembly::TEE_F32:
+  case WebAssembly::TEE_F32_S:
+  case WebAssembly::TEE_F64:
+  case WebAssembly::TEE_F64_S:
+  case WebAssembly::TEE_V128:
+  case WebAssembly::TEE_V128_S:
+    return true;
+  default:
+    return false;
+  }
+}
+
+inline bool isCallDirect(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::CALL_VOID:
+  case WebAssembly::CALL_VOID_S:
+  case WebAssembly::CALL_i32:
+  case WebAssembly::CALL_i32_S:
+  case WebAssembly::CALL_i64:
+  case WebAssembly::CALL_i64_S:
+  case WebAssembly::CALL_f32:
+  case WebAssembly::CALL_f32_S:
+  case WebAssembly::CALL_f64:
+  case WebAssembly::CALL_f64_S:
+  case WebAssembly::CALL_v16i8:
+  case WebAssembly::CALL_v16i8_S:
+  case WebAssembly::CALL_v8i16:
+  case WebAssembly::CALL_v8i16_S:
+  case WebAssembly::CALL_v4i32:
+  case WebAssembly::CALL_v4i32_S:
+  case WebAssembly::CALL_v2i64:
+  case WebAssembly::CALL_v2i64_S:
+  case WebAssembly::CALL_v4f32:
+  case WebAssembly::CALL_v4f32_S:
+  case WebAssembly::CALL_v2f64:
+  case WebAssembly::CALL_v2f64_S:
+  case WebAssembly::CALL_ExceptRef:
+  case WebAssembly::CALL_ExceptRef_S:
+  case WebAssembly::RET_CALL:
+  case WebAssembly::RET_CALL_S:
+    return true;
+  default:
+    return false;
+  }
+}
+
+inline bool isCallIndirect(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::CALL_INDIRECT_VOID:
+  case WebAssembly::CALL_INDIRECT_VOID_S:
+  case WebAssembly::CALL_INDIRECT_i32:
+  case WebAssembly::CALL_INDIRECT_i32_S:
+  case WebAssembly::CALL_INDIRECT_i64:
+  case WebAssembly::CALL_INDIRECT_i64_S:
+  case WebAssembly::CALL_INDIRECT_f32:
+  case WebAssembly::CALL_INDIRECT_f32_S:
+  case WebAssembly::CALL_INDIRECT_f64:
+  case WebAssembly::CALL_INDIRECT_f64_S:
+  case WebAssembly::CALL_INDIRECT_v16i8:
+  case WebAssembly::CALL_INDIRECT_v16i8_S:
+  case WebAssembly::CALL_INDIRECT_v8i16:
+  case WebAssembly::CALL_INDIRECT_v8i16_S:
+  case WebAssembly::CALL_INDIRECT_v4i32:
+  case WebAssembly::CALL_INDIRECT_v4i32_S:
+  case WebAssembly::CALL_INDIRECT_v2i64:
+  case WebAssembly::CALL_INDIRECT_v2i64_S:
+  case WebAssembly::CALL_INDIRECT_v4f32:
+  case WebAssembly::CALL_INDIRECT_v4f32_S:
+  case WebAssembly::CALL_INDIRECT_v2f64:
+  case WebAssembly::CALL_INDIRECT_v2f64_S:
+  case WebAssembly::CALL_INDIRECT_ExceptRef:
+  case WebAssembly::CALL_INDIRECT_ExceptRef_S:
+  case WebAssembly::RET_CALL_INDIRECT:
+  case WebAssembly::RET_CALL_INDIRECT_S:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Returns the operand number of a callee, assuming the argument is a call
+/// instruction.
+inline unsigned getCalleeOpNo(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::CALL_VOID:
+  case WebAssembly::CALL_VOID_S:
+  case WebAssembly::CALL_INDIRECT_VOID:
+  case WebAssembly::CALL_INDIRECT_VOID_S:
+  case WebAssembly::RET_CALL:
+  case WebAssembly::RET_CALL_S:
+  case WebAssembly::RET_CALL_INDIRECT:
+  case WebAssembly::RET_CALL_INDIRECT_S:
+    return 0;
+  case WebAssembly::CALL_i32:
+  case WebAssembly::CALL_i32_S:
+  case WebAssembly::CALL_i64:
+  case WebAssembly::CALL_i64_S:
+  case WebAssembly::CALL_f32:
+  case WebAssembly::CALL_f32_S:
+  case WebAssembly::CALL_f64:
+  case WebAssembly::CALL_f64_S:
+  case WebAssembly::CALL_v16i8:
+  case WebAssembly::CALL_v16i8_S:
+  case WebAssembly::CALL_v8i16:
+  case WebAssembly::CALL_v8i16_S:
+  case WebAssembly::CALL_v4i32:
+  case WebAssembly::CALL_v4i32_S:
+  case WebAssembly::CALL_v2i64:
+  case WebAssembly::CALL_v2i64_S:
+  case WebAssembly::CALL_v4f32:
+  case WebAssembly::CALL_v4f32_S:
+  case WebAssembly::CALL_v2f64:
+  case WebAssembly::CALL_v2f64_S:
+  case WebAssembly::CALL_ExceptRef:
+  case WebAssembly::CALL_ExceptRef_S:
+  case WebAssembly::CALL_INDIRECT_i32:
+  case WebAssembly::CALL_INDIRECT_i32_S:
+  case WebAssembly::CALL_INDIRECT_i64:
+  case WebAssembly::CALL_INDIRECT_i64_S:
+  case WebAssembly::CALL_INDIRECT_f32:
+  case WebAssembly::CALL_INDIRECT_f32_S:
+  case WebAssembly::CALL_INDIRECT_f64:
+  case WebAssembly::CALL_INDIRECT_f64_S:
+  case WebAssembly::CALL_INDIRECT_v16i8:
+  case WebAssembly::CALL_INDIRECT_v16i8_S:
+  case WebAssembly::CALL_INDIRECT_v8i16:
+  case WebAssembly::CALL_INDIRECT_v8i16_S:
+  case WebAssembly::CALL_INDIRECT_v4i32:
+  case WebAssembly::CALL_INDIRECT_v4i32_S:
+  case WebAssembly::CALL_INDIRECT_v2i64:
+  case WebAssembly::CALL_INDIRECT_v2i64_S:
+  case WebAssembly::CALL_INDIRECT_v4f32:
+  case WebAssembly::CALL_INDIRECT_v4f32_S:
+  case WebAssembly::CALL_INDIRECT_v2f64:
+  case WebAssembly::CALL_INDIRECT_v2f64_S:
+  case WebAssembly::CALL_INDIRECT_ExceptRef:
+  case WebAssembly::CALL_INDIRECT_ExceptRef_S:
+    return 1;
+  default:
+    llvm_unreachable("Not a call instruction");
+  }
+}
+
+inline bool isMarker(unsigned Opc) {
+  switch (Opc) {
+  case WebAssembly::BLOCK:
+  case WebAssembly::BLOCK_S:
+  case WebAssembly::END_BLOCK:
+  case WebAssembly::END_BLOCK_S:
+  case WebAssembly::LOOP:
+  case WebAssembly::LOOP_S:
+  case WebAssembly::END_LOOP:
+  case WebAssembly::END_LOOP_S:
+  case WebAssembly::TRY:
+  case WebAssembly::TRY_S:
+  case WebAssembly::END_TRY:
+  case WebAssembly::END_TRY_S:
+    return true;
+  default:
+    return false;
+  }
+}
 
 } // end namespace WebAssembly
 } // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
index 3dcf34aeb280f..02f5cc6da77ca 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -78,7 +78,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
 
   // Look for the first NonArg instruction.
   for (MachineInstr &MI : EntryMBB) {
-    if (!WebAssembly::isArgument(MI)) {
+    if (!WebAssembly::isArgument(MI.getOpcode())) {
       InsertPt = MI;
       break;
     }
@@ -87,7 +87,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
   // Now move any argument instructions later in the block
   // to before our first NonArg instruction.
   for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) {
-    if (WebAssembly::isArgument(MI)) {
+    if (WebAssembly::isArgument(MI.getOpcode())) {
       EntryMBB.insert(InsertPt, MI.removeFromParent());
       Changed = true;
     }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index a429bee466400..a23a47d2e89ad 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -894,7 +894,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
 
       // We wrap up the current range when we see a marker even if we haven't
       // finished a BB.
-      if (RangeEnd && WebAssembly::isMarker(MI)) {
+      if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) {
         NeedAppendixBlock = true;
         // Record the range. nullptr here means the unwind destination is the
         // caller.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 5343697527445..de7e912129fb6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -205,7 +205,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
                                    E = MF.begin()->end();
        I != E;) {
     MachineInstr &MI = *I++;
-    if (!WebAssembly::isArgument(MI))
+    if (!WebAssembly::isArgument(MI.getOpcode()))
       break;
     unsigned Reg = MI.getOperand(0).getReg();
     assert(!MFI.isVRegStackified(Reg));
@@ -227,7 +227,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   for (MachineBasicBlock &MBB : MF) {
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
       MachineInstr &MI = *I++;
-      assert(!WebAssembly::isArgument(MI));
+      assert(!WebAssembly::isArgument(MI.getOpcode()));
 
       if (MI.isDebugInstr() || MI.isLabel())
         continue;
@@ -235,7 +235,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
       // Replace tee instructions with local.tee. The difference is that tee
       // instructions have two defs, while local.tee instructions have one def
       // and an index of a local to write to.
-      if (WebAssembly::isTee(MI)) {
+      if (WebAssembly::isTee(MI.getOpcode())) {
         assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
         assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
         unsigned OldReg = MI.getOperand(2).getReg();
@@ -356,7 +356,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // Coalesce and eliminate COPY instructions.
-      if (WebAssembly::isCopy(MI)) {
+      if (WebAssembly::isCopy(MI.getOpcode())) {
         MRI.replaceRegWith(MI.getOperand(1).getReg(),
                            MI.getOperand(0).getReg());
         MI.eraseFromParent();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index a1c567fa3c073..5299068efdd44 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -164,7 +164,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   auto &MRI = MF.getRegInfo();
 
   auto InsertPt = MBB.begin();
-  while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt))
+  while (InsertPt != MBB.end() &&
+         WebAssembly::isArgument(InsertPt->getOpcode()))
     ++InsertPt;
   DebugLoc DL;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 27f13d9639a06..611f05f949691 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -16,7 +16,7 @@
 #include "WebAssemblyAsmPrinter.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyRuntimeLibcallSignatures.h"
-#include "WebAssemblyUtilities.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/IR/Constants.h"
@@ -221,7 +221,7 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
 
           // call_indirect instructions have a callee operand at the end which
           // doesn't count as a param.
-          if (WebAssembly::isCallIndirect(*MI))
+          if (WebAssembly::isCallIndirect(MI->getOpcode()))
             Params.pop_back();
 
           auto *WasmSym = cast<MCSymbolWasm>(Sym);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 12b70f7ce4f13..3bfbf607344db 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -64,7 +64,7 @@ FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
 // Test whether the given register has an ARGUMENT def.
 static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
   for (const auto &Def : MRI.def_instructions(Reg))
-    if (WebAssembly::isArgument(Def))
+    if (WebAssembly::isArgument(Def.getOpcode()))
       return true;
   return false;
 }
@@ -114,7 +114,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
   // liveness reflects the fact that these really are live-in values.
   for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
     MachineInstr &MI = *MII++;
-    if (WebAssembly::isArgument(MI)) {
+    if (WebAssembly::isArgument(MI.getOpcode())) {
       MI.removeFromParent();
       Entry.insert(Entry.begin(), &MI);
     }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 424b2ca2f841c..cdca23f55b29f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -72,7 +72,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
   // variables. Assign the numbers for them first.
   MachineBasicBlock &EntryMBB = MF.front();
   for (MachineInstr &MI : EntryMBB) {
-    if (!WebAssembly::isArgument(MI))
+    if (!WebAssembly::isArgument(MI.getOpcode()))
       break;
 
     int64_t Imm = MI.getOperand(1).getImm();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 6463e268c9059..31ba6f0e4c237 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -252,7 +252,7 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
 
   // Analyze calls.
   if (MI.isCall()) {
-    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI);
+    unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode());
     queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer);
   }
 }
@@ -826,7 +826,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
 
         // Argument instructions represent live-in registers and not real
         // instructions.
-        if (WebAssembly::isArgument(*Def))
+        if (WebAssembly::isArgument(Def->getOpcode()))
           continue;
 
         // Currently catch's return value register cannot be stackified, because
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index cca27d9c8d4e4..e9d88d4818a58 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -24,72 +24,6 @@ const char *const WebAssembly::StdTerminateFn = "_ZSt9terminatev";
 const char *const WebAssembly::PersonalityWrapperFn =
     "_Unwind_Wasm_CallPersonality";
 
-bool WebAssembly::isArgument(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::ARGUMENT_i32:
-  case WebAssembly::ARGUMENT_i32_S:
-  case WebAssembly::ARGUMENT_i64:
-  case WebAssembly::ARGUMENT_i64_S:
-  case WebAssembly::ARGUMENT_f32:
-  case WebAssembly::ARGUMENT_f32_S:
-  case WebAssembly::ARGUMENT_f64:
-  case WebAssembly::ARGUMENT_f64_S:
-  case WebAssembly::ARGUMENT_v16i8:
-  case WebAssembly::ARGUMENT_v16i8_S:
-  case WebAssembly::ARGUMENT_v8i16:
-  case WebAssembly::ARGUMENT_v8i16_S:
-  case WebAssembly::ARGUMENT_v4i32:
-  case WebAssembly::ARGUMENT_v4i32_S:
-  case WebAssembly::ARGUMENT_v2i64:
-  case WebAssembly::ARGUMENT_v2i64_S:
-  case WebAssembly::ARGUMENT_v4f32:
-  case WebAssembly::ARGUMENT_v4f32_S:
-  case WebAssembly::ARGUMENT_v2f64:
-  case WebAssembly::ARGUMENT_v2f64_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isCopy(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::COPY_I32:
-  case WebAssembly::COPY_I32_S:
-  case WebAssembly::COPY_I64:
-  case WebAssembly::COPY_I64_S:
-  case WebAssembly::COPY_F32:
-  case WebAssembly::COPY_F32_S:
-  case WebAssembly::COPY_F64:
-  case WebAssembly::COPY_F64_S:
-  case WebAssembly::COPY_V128:
-  case WebAssembly::COPY_V128_S:
-  case WebAssembly::COPY_EXCEPT_REF:
-  case WebAssembly::COPY_EXCEPT_REF_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isTee(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::TEE_I32:
-  case WebAssembly::TEE_I32_S:
-  case WebAssembly::TEE_I64:
-  case WebAssembly::TEE_I64_S:
-  case WebAssembly::TEE_F32:
-  case WebAssembly::TEE_F32_S:
-  case WebAssembly::TEE_F64:
-  case WebAssembly::TEE_F64_S:
-  case WebAssembly::TEE_V128:
-  case WebAssembly::TEE_V128_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
 /// Test whether MI is a child of some other node in an expression tree.
 bool WebAssembly::isChild(const MachineInstr &MI,
                           const WebAssemblyFunctionInfo &MFI) {
@@ -103,155 +37,6 @@ bool WebAssembly::isChild(const MachineInstr &MI,
          MFI.isVRegStackified(Reg);
 }
 
-bool WebAssembly::isCallDirect(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CALL_VOID:
-  case WebAssembly::CALL_VOID_S:
-  case WebAssembly::CALL_i32:
-  case WebAssembly::CALL_i32_S:
-  case WebAssembly::CALL_i64:
-  case WebAssembly::CALL_i64_S:
-  case WebAssembly::CALL_f32:
-  case WebAssembly::CALL_f32_S:
-  case WebAssembly::CALL_f64:
-  case WebAssembly::CALL_f64_S:
-  case WebAssembly::CALL_v16i8:
-  case WebAssembly::CALL_v16i8_S:
-  case WebAssembly::CALL_v8i16:
-  case WebAssembly::CALL_v8i16_S:
-  case WebAssembly::CALL_v4i32:
-  case WebAssembly::CALL_v4i32_S:
-  case WebAssembly::CALL_v2i64:
-  case WebAssembly::CALL_v2i64_S:
-  case WebAssembly::CALL_v4f32:
-  case WebAssembly::CALL_v4f32_S:
-  case WebAssembly::CALL_v2f64:
-  case WebAssembly::CALL_v2f64_S:
-  case WebAssembly::CALL_ExceptRef:
-  case WebAssembly::CALL_ExceptRef_S:
-  case WebAssembly::RET_CALL:
-  case WebAssembly::RET_CALL_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CALL_INDIRECT_VOID:
-  case WebAssembly::CALL_INDIRECT_VOID_S:
-  case WebAssembly::CALL_INDIRECT_i32:
-  case WebAssembly::CALL_INDIRECT_i32_S:
-  case WebAssembly::CALL_INDIRECT_i64:
-  case WebAssembly::CALL_INDIRECT_i64_S:
-  case WebAssembly::CALL_INDIRECT_f32:
-  case WebAssembly::CALL_INDIRECT_f32_S:
-  case WebAssembly::CALL_INDIRECT_f64:
-  case WebAssembly::CALL_INDIRECT_f64_S:
-  case WebAssembly::CALL_INDIRECT_v16i8:
-  case WebAssembly::CALL_INDIRECT_v16i8_S:
-  case WebAssembly::CALL_INDIRECT_v8i16:
-  case WebAssembly::CALL_INDIRECT_v8i16_S:
-  case WebAssembly::CALL_INDIRECT_v4i32:
-  case WebAssembly::CALL_INDIRECT_v4i32_S:
-  case WebAssembly::CALL_INDIRECT_v2i64:
-  case WebAssembly::CALL_INDIRECT_v2i64_S:
-  case WebAssembly::CALL_INDIRECT_v4f32:
-  case WebAssembly::CALL_INDIRECT_v4f32_S:
-  case WebAssembly::CALL_INDIRECT_v2f64:
-  case WebAssembly::CALL_INDIRECT_v2f64_S:
-  case WebAssembly::CALL_INDIRECT_ExceptRef:
-  case WebAssembly::CALL_INDIRECT_ExceptRef_S:
-  case WebAssembly::RET_CALL_INDIRECT:
-  case WebAssembly::RET_CALL_INDIRECT_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
-unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::CALL_VOID:
-  case WebAssembly::CALL_VOID_S:
-  case WebAssembly::CALL_INDIRECT_VOID:
-  case WebAssembly::CALL_INDIRECT_VOID_S:
-  case WebAssembly::RET_CALL:
-  case WebAssembly::RET_CALL_S:
-  case WebAssembly::RET_CALL_INDIRECT:
-  case WebAssembly::RET_CALL_INDIRECT_S:
-    return 0;
-  case WebAssembly::CALL_i32:
-  case WebAssembly::CALL_i32_S:
-  case WebAssembly::CALL_i64:
-  case WebAssembly::CALL_i64_S:
-  case WebAssembly::CALL_f32:
-  case WebAssembly::CALL_f32_S:
-  case WebAssembly::CALL_f64:
-  case WebAssembly::CALL_f64_S:
-  case WebAssembly::CALL_v16i8:
-  case WebAssembly::CALL_v16i8_S:
-  case WebAssembly::CALL_v8i16:
-  case WebAssembly::CALL_v8i16_S:
-  case WebAssembly::CALL_v4i32:
-  case WebAssembly::CALL_v4i32_S:
-  case WebAssembly::CALL_v2i64:
-  case WebAssembly::CALL_v2i64_S:
-  case WebAssembly::CALL_v4f32:
-  case WebAssembly::CALL_v4f32_S:
-  case WebAssembly::CALL_v2f64:
-  case WebAssembly::CALL_v2f64_S:
-  case WebAssembly::CALL_ExceptRef:
-  case WebAssembly::CALL_ExceptRef_S:
-  case WebAssembly::CALL_INDIRECT_i32:
-  case WebAssembly::CALL_INDIRECT_i32_S:
-  case WebAssembly::CALL_INDIRECT_i64:
-  case WebAssembly::CALL_INDIRECT_i64_S:
-  case WebAssembly::CALL_INDIRECT_f32:
-  case WebAssembly::CALL_INDIRECT_f32_S:
-  case WebAssembly::CALL_INDIRECT_f64:
-  case WebAssembly::CALL_INDIRECT_f64_S:
-  case WebAssembly::CALL_INDIRECT_v16i8:
-  case WebAssembly::CALL_INDIRECT_v16i8_S:
-  case WebAssembly::CALL_INDIRECT_v8i16:
-  case WebAssembly::CALL_INDIRECT_v8i16_S:
-  case WebAssembly::CALL_INDIRECT_v4i32:
-  case WebAssembly::CALL_INDIRECT_v4i32_S:
-  case WebAssembly::CALL_INDIRECT_v2i64:
-  case WebAssembly::CALL_INDIRECT_v2i64_S:
-  case WebAssembly::CALL_INDIRECT_v4f32:
-  case WebAssembly::CALL_INDIRECT_v4f32_S:
-  case WebAssembly::CALL_INDIRECT_v2f64:
-  case WebAssembly::CALL_INDIRECT_v2f64_S:
-  case WebAssembly::CALL_INDIRECT_ExceptRef:
-  case WebAssembly::CALL_INDIRECT_ExceptRef_S:
-    return 1;
-  default:
-    llvm_unreachable("Not a call instruction");
-  }
-}
-
-bool WebAssembly::isMarker(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::BLOCK:
-  case WebAssembly::BLOCK_S:
-  case WebAssembly::END_BLOCK:
-  case WebAssembly::END_BLOCK_S:
-  case WebAssembly::LOOP:
-  case WebAssembly::LOOP_S:
-  case WebAssembly::END_LOOP:
-  case WebAssembly::END_LOOP_S:
-  case WebAssembly::TRY:
-  case WebAssembly::TRY_S:
-  case WebAssembly::END_TRY:
-  case WebAssembly::END_TRY_S:
-    return true;
-  default:
-    return false;
-  }
-}
-
 bool WebAssembly::mayThrow(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
   case WebAssembly::THROW:
@@ -260,12 +45,12 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) {
   case WebAssembly::RETHROW_S:
     return true;
   }
-  if (isCallIndirect(MI))
+  if (isCallIndirect(MI.getOpcode()))
     return true;
   if (!MI.isCall())
     return false;
 
-  const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI));
+  const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode()));
   assert(MO.isGlobal());
   const auto *F = dyn_cast<Function>(MO.getGlobal());
   if (!F)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
index f80b49662ea64..26cf84de89b92 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -23,19 +23,9 @@ class WebAssemblyFunctionInfo;
 
 namespace WebAssembly {
 
-bool isArgument(const MachineInstr &MI);
-bool isCopy(const MachineInstr &MI);
-bool isTee(const MachineInstr &MI);
 bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
-bool isCallDirect(const MachineInstr &MI);
-bool isCallIndirect(const MachineInstr &MI);
-bool isMarker(const MachineInstr &MI);
 bool mayThrow(const MachineInstr &MI);
 
-/// Returns the operand number of a callee, assuming the argument is a call
-/// instruction.
-unsigned getCalleeOpNo(const MachineInstr &MI);
-
 // Exception-related function names
 extern const char *const ClangCallTerminateFn;
 extern const char *const CxaBeginCatchFn;

From ec2abbafda627963e600c890ddf06ec3b7a1b399 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Fri, 12 Jul 2019 22:11:43 +0000
Subject: [PATCH 003/451] [DirectoryWatcher][linux] Fix use of uninitialized
 value

llvm-svn: 365966
---
 clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
index 986ebc5d95fab..87d133f46d8e8 100644
--- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
+++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
@@ -220,8 +220,8 @@ void DirectoryWatcherLinux::InotifyPollingLoop() {
 
     // Multiple epoll_events can be received for a single file descriptor per
     // epoll_wait call.
-    for (const auto &EpollEvent : EpollEventBuffer) {
-      if (EpollEvent.data.fd == InotifyPollingStopSignal.FDRead) {
+    for (int i = 0; i < EpollWaitResult; ++i) {
+      if (EpollEventBuffer[i].data.fd == InotifyPollingStopSignal.FDRead) {
         StopWork();
         return;
       }

From b131ad0be2849fe367eac4ad7cc6eca198a08a28 Mon Sep 17 00:00:00 2001
From: Julie Hockett <juliehockett@google.com>
Date: Fri, 12 Jul 2019 22:19:02 +0000
Subject: [PATCH 004/451] [clang-doc] Fix failing tests on Windows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests on Windows were failing due to path separator differences.
'/' was being used as separator in the expected output, paths in expected
output are now changed to their native form before comparing them to the
actual output.

Committed on behalf of Diego Astiazarán (diegoaat97@gmail.com).

Differential Revision: https://reviews.llvm.org/D64669

llvm-svn: 365967
---
 .../unittests/clang-doc/HTMLGeneratorTest.cpp | 31 ++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp b/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp
index e95ceb878a68a..eabc6d45fe581 100644
--- a/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp
+++ b/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp
@@ -79,10 +79,11 @@ TEST(HTMLGeneratorTest, emitRecordHTML) {
   I.DefLoc = Location(10, llvm::SmallString<16>{"test.cpp"});
   I.Loc.emplace_back(12, llvm::SmallString<16>{"test.cpp"});
 
+  SmallString<16> PathTo;
+  llvm::sys::path::native("path/to", PathTo);
   I.Members.emplace_back("int", "X/Y", "X", AccessSpecifier::AS_private);
   I.TagType = TagTypeKind::TTK_Class;
-  I.Parents.emplace_back(EmptySID, "F", InfoType::IT_record,
-                         llvm::SmallString<128>("path/to"));
+  I.Parents.emplace_back(EmptySID, "F", InfoType::IT_record, PathTo);
   I.VirtualParents.emplace_back(EmptySID, "G", InfoType::IT_record);
 
   I.ChildRecords.emplace_back(EmptySID, "ChildStruct", InfoType::IT_record);
@@ -97,6 +98,10 @@ TEST(HTMLGeneratorTest, emitRecordHTML) {
   llvm::raw_string_ostream Actual(Buffer);
   auto Err = G->generateDocForInfo(&I, Actual);
   assert(!Err);
+  SmallString<16> PathToF;
+  llvm::sys::path::native("../../../path/to/F.html", PathToF);
+  SmallString<16> PathToInt;
+  llvm::sys::path::native("../int.html", PathToInt);
   std::string Expected = R"raw(<!DOCTYPE html>
 <meta charset="utf-8"/>
 <title>class r</title>
@@ -107,12 +112,14 @@ TEST(HTMLGeneratorTest, emitRecordHTML) {
   </p>
   <p>
     Inherits from 
-    <a href="../../../path/to/F.html">F</a>
+    <a href=")raw" + std::string(PathToF.str()) +
+                         R"raw(">F</a>
     , G
   </p>
   <h2>Members</h2>
   <ul>
-    <li>private <a href="../int.html">int</a> X</li>
+    <li>private <a href=")raw" +
+                         std::string(PathToInt.str()) + R"raw(">int</a> X</li>
   </ul>
   <h2>Records</h2>
   <ul>
@@ -143,8 +150,10 @@ TEST(HTMLGeneratorTest, emitFunctionHTML) {
   I.DefLoc = Location(10, llvm::SmallString<16>{"test.cpp"});
   I.Loc.emplace_back(12, llvm::SmallString<16>{"test.cpp"});
 
-  I.ReturnType = TypeInfo(EmptySID, "float", InfoType::IT_default, "path/to");
-  I.Params.emplace_back("int", "path/to", "P");
+  SmallString<16> PathTo;
+  llvm::sys::path::native("path/to", PathTo);
+  I.ReturnType = TypeInfo(EmptySID, "float", InfoType::IT_default, PathTo);
+  I.Params.emplace_back("int", PathTo, "P");
   I.IsMethod = true;
   I.Parent = Reference(EmptySID, "Parent", InfoType::IT_record);
 
@@ -154,15 +163,21 @@ TEST(HTMLGeneratorTest, emitFunctionHTML) {
   llvm::raw_string_ostream Actual(Buffer);
   auto Err = G->generateDocForInfo(&I, Actual);
   assert(!Err);
+  SmallString<16> PathToFloat;
+  llvm::sys::path::native("path/to/float.html", PathToFloat);
+  SmallString<16> PathToInt;
+  llvm::sys::path::native("path/to/int.html", PathToInt);
   std::string Expected = R"raw(<!DOCTYPE html>
 <meta charset="utf-8"/>
 <title></title>
 <div>
   <h3>f</h3>
   <p>
-    <a href="path/to/float.html">float</a>
+    <a href=")raw" + std::string(PathToFloat.str()) +
+                         R"raw(">float</a>
      f(
-    <a href="path/to/int.html">int</a>
+    <a href=")raw" + std::string(PathToInt.str()) +
+                         R"raw(">int</a>
      P)
   </p>
   <p>

From 000ba715ddbd2a7af17534105f8a0916d4168c3e Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Fri, 12 Jul 2019 22:25:17 +0000
Subject: [PATCH 005/451] [DirectoryWatcher][NFC] Silence warnings in release
 build

llvm-svn: 365968
---
 .../linux/DirectoryWatcher-linux.cpp                |  5 ++++-
 .../DirectoryWatcher/DirectoryWatcherTest.cpp       | 13 ++++++++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
index 87d133f46d8e8..0c9f799b638d6 100644
--- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
+++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
@@ -55,7 +55,10 @@ struct SemaphorePipe {
   };
 
   void signal() {
-    ssize_t Result = llvm::sys::RetryAfterSignal(-1, write, FDWrite, "A", 1);
+#ifndef NDEBUG
+    ssize_t Result =
+#endif
+    llvm::sys::RetryAfterSignal(-1, write, FDWrite, "A", 1);
     assert(Result != -1);
   }
   ~SemaphorePipe() {
diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
index a2c50fc7d000f..0808ff47dee89 100644
--- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
+++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
@@ -38,12 +38,18 @@ struct DirectoryWatcherTestFixture {
 
   DirectoryWatcherTestFixture() {
     SmallString<128> pathBuf;
-    std::error_code UniqDirRes = createUniqueDirectory("dirwatcher", pathBuf);
+#ifndef NDEBUG
+    std::error_code UniqDirRes =
+#endif
+    createUniqueDirectory("dirwatcher", pathBuf);
     assert(!UniqDirRes);
     TestRootDir = pathBuf.str();
     path::append(pathBuf, "watch");
     TestWatchedDir = pathBuf.str();
-    std::error_code CreateDirRes = create_directory(TestWatchedDir, false);
+#ifndef NDEBUG
+    std::error_code CreateDirRes =
+#endif
+    create_directory(TestWatchedDir, false);
     assert(!CreateDirRes);
   }
 
@@ -415,8 +421,9 @@ TEST(DirectoryWatcherTest, ChangeMetadata) {
     const int FD = HopefullyTheFD.get();
     const TimePoint<> NewTimePt =
         std::chrono::system_clock::now() - std::chrono::minutes(1);
-
+#ifndef NDEBUG
     std::error_code setTimeRes =
+#endif
         llvm::sys::fs::setLastAccessAndModificationTime(FD, NewTimePt,
                                                         NewTimePt);
     assert(!setTimeRes);

From 9178b10163f758cbf8a5290ea6a827990427ddc0 Mon Sep 17 00:00:00 2001
From: Alex Lorenz <arphaman@gmail.com>
Date: Fri, 12 Jul 2019 22:29:44 +0000
Subject: [PATCH 006/451] NFC: utils/perf-training: Python 3 compatibility for
 lit.cfg

The output of subprocess.check_output is now bytes. We need to decode it.

llvm-svn: 365969
---
 clang/utils/perf-training/lit.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg
index 671d44f83b948..be822d66e38ce 100644
--- a/clang/utils/perf-training/lit.cfg
+++ b/clang/utils/perf-training/lit.cfg
@@ -10,7 +10,7 @@ def getSysrootFlagsOnDarwin(config, lit_config):
     # default system root path.
     if 'darwin' in config.target_triple:
         try:
-            out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip()
+            out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip().decode()
             res = 0
         except OSError:
             res = -1

From db101864bdc938deb1d63fe4f7da761bd38e5cae Mon Sep 17 00:00:00 2001
From: Alina Sbirlea <asbirlea@google.com>
Date: Fri, 12 Jul 2019 22:30:30 +0000
Subject: [PATCH 007/451] [MemorySSA] Use SetVector to avoid nondeterminism.

Summary:
Use a SetVector for DeadBlockSet.
Resolves PR42574.

Reviewers: george.burgess.iv, uabelho, dblaikie

Subscribers: jlebar, Prazek, mgrang, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64601

llvm-svn: 365970
---
 llvm/include/llvm/Analysis/MemorySSAUpdater.h |   3 +-
 llvm/lib/Analysis/MemorySSAUpdater.cpp        |   2 +-
 .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp |   4 +-
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |   6 +-
 llvm/lib/Transforms/Utils/Local.cpp           |   2 +-
 llvm/lib/Transforms/Utils/LoopSimplify.cpp    |   3 +-
 .../test/Analysis/MemorySSA/nondeterminism.ll | 122 ++++++++++++++++++
 7 files changed, 133 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/Analysis/MemorySSA/nondeterminism.ll

diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 6467d41cc0bf7..d4d8040c1ff66 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -31,6 +31,7 @@
 #ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H
 #define LLVM_ANALYSIS_MEMORYSSAUPDATER_H
 
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -243,7 +244,7 @@ class MemorySSAUpdater {
   /// Deleted blocks still have successor info, but their predecessor edges and
   /// Phi nodes may already be updated. Instructions in DeadBlocks should be
   /// deleted after this call.
-  void removeBlocks(const SmallPtrSetImpl<BasicBlock *> &DeadBlocks);
+  void removeBlocks(const SmallSetVector<BasicBlock *, 8> &DeadBlocks);
 
   /// Instruction I will be changed to an unreachable. Remove all accesses in
   /// I's block that follow I (inclusive), and update the Phis in the blocks'
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 19559a62eb9ea..4c1feee7fd9af 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -1247,7 +1247,7 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) {
 }
 
 void MemorySSAUpdater::removeBlocks(
-    const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
+    const SmallSetVector<BasicBlock *, 8> &DeadBlocks) {
   // First delete all uses of BB in MemoryPhis.
   for (BasicBlock *BB : DeadBlocks) {
     Instruction *TI = BB->getTerminator();
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index c650abb412d9b..046f4c8af492e 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -428,8 +428,8 @@ class ConstantTerminatorFoldingImpl {
   /// relevant updates to DT and LI.
   void deleteDeadLoopBlocks() {
     if (MSSAU) {
-      SmallPtrSet<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
-                                                     DeadLoopBlocks.end());
+      SmallSetVector<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(),
+                                                        DeadLoopBlocks.end());
       MSSAU->removeBlocks(DeadLoopBlocksSet);
     }
 
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 9715329223827..aeac6f548b32e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1463,8 +1463,8 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
 
   // Remove all MemorySSA in the dead blocks
   if (MSSAU) {
-    SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
-                                               DeadBlocks.end());
+    SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
+                                                 DeadBlocks.end());
     MSSAU->removeBlocks(DeadBlockSet);
   }
 
@@ -1482,7 +1482,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
                                      MemorySSAUpdater *MSSAU) {
   // Find all the dead blocks tied to this loop, and remove them from their
   // successors.
-  SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
 
   // Start with loop/exit blocks and get a transitive closure of reachable dead
   // blocks.
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 91d33cb0f20ea..39b6b889f91c4 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -2238,7 +2238,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI,
   assert(Reachable.size() < F.size());
   NumRemoved += F.size()-Reachable.size();
 
-  SmallPtrSet<BasicBlock *, 16> DeadBlockSet;
+  SmallSetVector<BasicBlock *, 8> DeadBlockSet;
   for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) {
     auto *BB = &*I;
     if (Reachable.count(BB))
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 5ec12aafff05b..7e6da02d57077 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -681,7 +681,8 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
       }
       DT->eraseNode(ExitingBlock);
       if (MSSAU) {
-        SmallPtrSet<BasicBlock *, 1> ExitBlockSet{ExitingBlock};
+        SmallSetVector<BasicBlock *, 8> ExitBlockSet;
+        ExitBlockSet.insert(ExitingBlock);
         MSSAU->removeBlocks(ExitBlockSet);
       }
 
diff --git a/llvm/test/Analysis/MemorySSA/nondeterminism.ll b/llvm/test/Analysis/MemorySSA/nondeterminism.ll
new file mode 100644
index 0000000000000..0bb3df30b5878
--- /dev/null
+++ b/llvm/test/Analysis/MemorySSA/nondeterminism.ll
@@ -0,0 +1,122 @@
+; RUN: opt -simplifycfg -enable-mssa-loop-dependency -S --preserve-ll-uselistorder %s | FileCheck %s
+; REQUIRES: x86-registered-target
+; CHECK-LABEL: @n
+; CHECK: uselistorder i16 0, { 3, 2, 4, 1, 5, 0, 6 }
+
+; Note: test was added in an effort to ensure determinism when updating memoryssa. See PR42574.
+; If the uselistorder check becomes no longer relevant, the test can be disabled or removed.
+
+%rec9 = type { i16, i32, i32 }
+
+@a = global [1 x [1 x %rec9]] zeroinitializer
+
+define i16 @n() {
+  br label %..split_crit_edge
+
+..split_crit_edge:                                ; preds = %0
+  br label %.split
+
+bb4.us4:                                          ; preds = %bb2.split.us32, %bb6.us28
+  %i.4.01.us5 = phi i16 [ %_tmp49.us30, %bb6.us28 ]
+  br label %g.exit4.us21
+
+bb1.i.us14:                                       ; preds = %bb4.us4
+  br label %g.exit4.us21
+
+g.exit4.us21:                                     ; preds = %bb1.i.us14, %g.exit4.critedge.us9
+  %i.4.02.us22 = phi i16 [ %i.4.01.us5, %bb4.us4 ], [ %i.4.01.us5, %bb1.i.us14 ]
+  br label %bb6.us28
+
+bb5.us26:                                         ; preds = %g.exit4.us21
+  br label %bb6.us28
+
+bb6.us28:                                         ; preds = %bb5.us26, %g.exit4.us21
+  %i.4.03.us29 = phi i16 [ %i.4.02.us22, %bb5.us26 ], [ %i.4.02.us22, %g.exit4.us21 ]
+  %_tmp49.us30 = add nuw nsw i16 %i.4.03.us29, 1
+  br label %bb4.us4
+
+bb4.us.us:                                        ; preds = %bb2.split.us.us, %bb6.us.us
+  %i.4.01.us.us = phi i16  [ %_tmp49.us.us, %bb6.us.us ]
+  br label %bb1.i.us.us
+
+bb1.i.us.us:                                      ; preds = %bb4.us.us
+  br label %g.exit4.us.us
+
+g.exit4.us.us:                                    ; preds = %bb1.i.us.us, %g.exit4.critedge.us.us
+  %i.4.02.us.us = phi i16 [ %i.4.01.us.us, %bb1.i.us.us ]
+  br label %bb5.us.us
+
+bb5.us.us:                                        ; preds = %g.exit4.us.us
+  br label %bb6.us.us
+
+bb6.us.us:                                        ; preds = %bb5.us.us, %g.exit4.us.us
+  %i.4.03.us.us = phi i16 [ %i.4.02.us.us, %bb5.us.us ]
+  %_tmp49.us.us = add nuw nsw i16 %i.4.03.us.us, 1
+  br label %bb4.us.us
+
+
+.split:                                           ; preds = %..split_crit_edge
+  br label %bb2
+
+bb2:                                              ; preds = %.split, %bb7
+  %h.3.0 = phi i16 [ undef, %.split ], [ %_tmp53, %bb7 ]
+  br label %bb2.bb2.split_crit_edge
+
+bb2.bb2.split_crit_edge:                          ; preds = %bb2
+  br label %bb2.split
+
+bb2.split.us:                                     ; preds = %bb2
+  br label %bb4.us
+
+bb4.us:                                           ; preds = %bb6.us, %bb2.split.us
+  %i.4.01.us = phi i16 [ 0, %bb2.split.us ]
+  br label %bb1.i.us
+
+g.exit4.critedge.us:                              ; preds = %bb4.us
+  br label %g.exit4.us
+
+bb1.i.us:                                         ; preds = %bb4.us
+  br label %g.exit4.us
+
+g.exit4.us:                                       ; preds = %bb1.i.us, %g.exit4.critedge.us
+  %i.4.02.us = phi i16 [ %i.4.01.us, %g.exit4.critedge.us ], [ %i.4.01.us, %bb1.i.us ]
+  br label %bb5.us
+
+bb5.us:                                           ; preds = %g.exit4.us
+  br label %bb7
+
+bb2.split:                                        ; preds = %bb2.bb2.split_crit_edge
+  br label %bb4
+
+bb4:                                              ; preds = %bb2.split, %bb6
+  %i.4.01 = phi i16 [ 0, %bb2.split ]
+  %_tmp16 = getelementptr [1 x [1 x %rec9]], [1 x [1 x %rec9]]* @a, i16 0, i16 %h.3.0, i16 %i.4.01, i32 0
+  %_tmp17 = load i16, i16* %_tmp16, align 1
+  br label %g.exit4.critedge
+
+bb1.i:                                            ; preds = %bb4
+  br label %g.exit4
+
+g.exit4.critedge:                                 ; preds = %bb4
+  %_tmp28.c = getelementptr [1 x [1 x %rec9]], [1 x [1 x %rec9]]* @a, i16 0, i16 %h.3.0, i16 %i.4.01, i32 1
+  %_tmp29.c = load i32, i32* %_tmp28.c, align 1
+  %_tmp30.c = trunc i32 %_tmp29.c to i16
+  br label %g.exit4
+
+g.exit4:                                          ; preds = %g.exit4.critedge, %bb1.i
+  %i.4.02 = phi i16 [ %i.4.01, %g.exit4.critedge ], [ %i.4.01, %bb1.i ]
+  %_tmp41 = getelementptr [1 x [1 x %rec9]], [1 x [1 x %rec9]]* @a, i16 0, i16 %h.3.0, i16 %i.4.02, i32 2
+  br label %bb6
+
+bb5:                                              ; preds = %g.exit4
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %g.exit4
+  %i.4.03 = phi i16 [ %i.4.02, %bb5 ], [ %i.4.02, %g.exit4 ]
+  %_tmp49 = add nuw nsw i16 %i.4.03, 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb7.us-lcssa.us, %bb7.us-lcssa
+  %_tmp53 = add nsw i16 %h.3.0, 1
+  br label %bb2
+}

From b1bff76e22bd39eb46dcae49891fda1cf1cc0bd5 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 12 Jul 2019 22:37:55 +0000
Subject: [PATCH 008/451] isBytewiseValue checks ConstantVector element by
 element

Summary: Vector of the same value with few undefs will sill be considered "Bytewise"

Reviewers: eugenis, pcc, jfb

Reviewed By: jfb

Subscribers: dexonsmith, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64031

llvm-svn: 365971
---
 llvm/lib/Analysis/ValueTracking.cpp           | 7 +------
 llvm/unittests/Analysis/ValueTrackingTest.cpp | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index cf8fa9cee3fc8..ad8034b2d7bc5 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3253,12 +3253,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
     return Val;
   }
 
-  if (isa<ConstantVector>(C)) {
-    Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
-    return Splat ? isBytewiseValue(Splat, DL) : nullptr;
-  }
-
-  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+  if (isa<ConstantAggregate>(C)) {
     Value *Val = UndefInt8;
     for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
       if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index f4316cc8575c4..96b41d93d568a 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -878,7 +878,7 @@ const std::pair<const char *, const char *> IsBytewiseValueTests[] = {
         "<4 x i8> <i8 1, i8 1, i8 2, i8 1>",
     },
     {
-        "",
+        "i8 5",
         "<2 x i8> < i8 5, i8 undef >",
     },
     {

From 1dfae6fe505ffedf97e9f36d207cb8bbdc9255d8 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Fri, 12 Jul 2019 22:42:01 +0000
Subject: [PATCH 009/451] [AMDGPU] use v32f32 for 3 mfma intrinsics

These should really use v32f32, but were defined as v32i32
due to the lack of the v32f32 type.

Differential Revision: https://reviews.llvm.org/D64667

llvm-svn: 365972
---
 llvm/include/llvm/IR/Intrinsics.td            |  1 +
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      | 12 ++--
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  9 +++
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  6 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |  7 +-
 llvm/lib/Target/AMDGPU/SIInstructions.td      | 12 ++++
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |  8 +--
 .../CodeGen/AMDGPU/agpr-register-count.ll     |  8 +--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll  | 64 +++++++++----------
 llvm/test/CodeGen/AMDGPU/spill-agpr.ll        | 10 +--
 .../test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll | 10 ++-
 11 files changed, 87 insertions(+), 60 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 62e94108a7355..8276d7535c3b2 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -261,6 +261,7 @@ def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
 def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
 def llvm_v8f32_ty      : LLVMType<v8f32>;    //  8 x float
 def llvm_v16f32_ty     : LLVMType<v16f32>;   // 16 x float
+def llvm_v32f32_ty     : LLVMType<v32f32>;   // 32 x float
 def llvm_v1f64_ty      : LLVMType<v1f64>;    //  1 x double
 def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
 def llvm_v4f64_ty      : LLVMType<v4f64>;    //  4 x double
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 56878e1240749..43e827ec6ab99 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1663,8 +1663,8 @@ def int_amdgcn_buffer_atomic_fadd    : AMDGPUBufferAtomicNoRtn;
 def int_amdgcn_global_atomic_fadd    : AMDGPUGlobalAtomicNoRtn;
 
 // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
-def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32i32_ty],
-  [llvm_float_ty, llvm_float_ty, llvm_v32i32_ty,
+def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
+  [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
 
 def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty],
@@ -1683,8 +1683,8 @@ def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty],
   [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
 
-def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32i32_ty],
-  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32i32_ty,
+def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty],
+  [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
 
 def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty],
@@ -1723,8 +1723,8 @@ def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
 
-def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32i32_ty],
-  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32i32_ty,
+def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty],
+  [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
    llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
 
 def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 56922b0505064..14ae62968c65b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -165,6 +165,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
   AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
 
+  setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);
+
   setOperationAction(ISD::LOAD, MVT::i64, Promote);
   AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
 
@@ -256,6 +259,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STORE, MVT::v16f32, Promote);
   AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
 
+  setOperationAction(ISD::STORE, MVT::v32f32, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);
+
   setOperationAction(ISD::STORE, MVT::i64, Promote);
   AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
 
@@ -355,7 +361,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
 
   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3eb1b1c91066c..b90a0d28e9ef0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -153,6 +153,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
   if (Subtarget->hasMAIInsts()) {
     addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass);
+    addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass);
   }
 
   computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -263,8 +264,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
   // We only support LOAD/STORE and vector manipulation ops for vectors
   // with > 4 elements.
-  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
-        MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) {
+  for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+                  MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
+                  MVT::v32i32, MVT::v32f32 }) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch (Op) {
       case ISD::LOAD:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 98928f00a4568..c382c816e0b40 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2178,14 +2178,13 @@ def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
 
 def VOP_V4F32_F32_F32_V4F32       : VOPProfile <[v4f32,  f32,   f32,   v4f32]>;
 def VOP_V16F32_F32_F32_V16F32     : VOPProfile <[v16f32, f32,   f32,   v16f32]>;
-// TODO: define v32f32
-def VOP_V32F32_F32_F32_V32F32     : VOPProfile <[v32i32, f32,   f32,   v32i32]>;
+def VOP_V32F32_F32_F32_V32F32     : VOPProfile <[v32f32, f32,   f32,   v32f32]>;
 def VOP_V4F32_V4F16_V4F16_V4F32   : VOPProfile <[v4f32,  v4f16, v4f16, v4f32]>;
 def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
-def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32i32, v4f16, v4f16, v32i32]>;
+def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
 def VOP_V4F32_V2I16_V2I16_V4F32   : VOPProfile <[v4f32,  v2i16, v2i16, v4f32]>;
 def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
-def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32i32, v2i16, v2i16, v32i32]>;
+def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
 def VOP_V4I32_I32_I32_V4I32       : VOPProfile <[v4i32,  i32,   i32,   v4i32]>;
 def VOP_V16I32_I32_I32_V16I32     : VOPProfile <[v16i32, i32,   i32,   v16i32]>;
 def VOP_V32I32_I32_I32_V32I32     : VOPProfile <[v32i32, i32,   i32,   v32i32]>;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index fd4b6f5e3e31d..70f20bb693704 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -942,6 +942,14 @@ foreach Index = 0-31 in {
   def Insert_Element_v32i32_#Index : Insert_Element <
     i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
+
+  def Extract_Element_v32f32_#Index : Extract_Element <
+    f32, v32f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
+
+  def Insert_Element_v32f32_#Index : Insert_Element <
+    f32, v32f32, Index, !cast<SubRegIndex>(sub#Index)
+  >;
 }
 
 // FIXME: Why do only some of these type combinations for SReg and
@@ -1034,6 +1042,10 @@ def : BitConvert <v8f32, v8i32, VReg_256>;
 def : BitConvert <v16i32, v16f32, VReg_512>;
 def : BitConvert <v16f32, v16i32, VReg_512>;
 
+// 1024-bit bitcast
+def : BitConvert <v32i32, v32f32, VReg_1024>;
+def : BitConvert <v32f32, v32i32, VReg_1024>;
+
 /********** =================== **********/
 /********** Src & Dst modifiers **********/
 /********** =================== **********/
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 14d41d84cbadc..4767f3c30ed32 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -757,11 +757,11 @@ def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 3
   let isAllocatable = 0;
 }
 
-def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add SGPR_1024Regs)> {
+def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024Regs)> {
   let AllocationPriority = 19;
 }
 
-def SReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32,
+def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
   (add SGPR_1024)> {
   let CopyCost = 16;
   let AllocationPriority = 19;
@@ -812,7 +812,7 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
   let AllocationPriority = 7;
 }
 
-def VReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add VGPR_1024)> {
+def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add VGPR_1024)> {
   let Size = 1024;
   let CopyCost = 32;
   let AllocationPriority = 8;
@@ -840,7 +840,7 @@ def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> {
 }
 
 // TODO: add v32f32 value type
-def AReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add AGPR_1024)> {
+def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add AGPR_1024)> {
   let Size = 1024;
   let CopyCost = 65;
   let AllocationPriority = 8;
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index ab4fcc54f65c8..dfedd2402a03f 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32)
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 
 ; GCN-LABEL: {{^}}test_32_agprs:
 ; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}}, 0
 ; GCN-NOT: v28
 ; GCN: NumVgprs: 32
 ; GCN: VGPRBlocks: 7
-define amdgpu_kernel void @test_32_agprs(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) {
 bb:
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 0, i32 0)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 0ce08777c14b5..5ac03632fbbe0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32)
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
 declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float, float, <16 x float>, i32, i32, i32)
 declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float, float, <4 x float>, i32, i32, i32)
-declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x i32>, i32, i32, i32)
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32)
 declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half>, <4 x half>, <4 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32)
@@ -15,7 +15,7 @@ declare <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32, i32, <16 x i32>, i32, i3
 declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
 declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32)
 declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32, i32)
-declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x i32>, i32, i32, i32)
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32)
 declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16>, <2 x i16>, <4 x float>, i32, i32, i32)
 declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32)
@@ -100,11 +100,11 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x float> addrspace(1)* %arg) {
 bb:
-  %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %in.1, i32 1, i32 2, i32 3)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg
+  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
   ret void
 }
 
@@ -326,14 +326,14 @@ bb:
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f32_32x32x4f16(<32 x i32> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) {
+define amdgpu_kernel void @test_mfma_f32_32x32x4f16(<32 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) {
 bb:
-  %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg
+  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
   %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c
   %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1
   %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %c.1, <4 x half> %c.2, <32 x i32> %in.1, i32 1, i32 2, i32 3)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %c.1, <4 x half> %c.2, <32 x float> %in.1, i32 1, i32 2, i32 3)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
   ret void
 }
 
@@ -794,13 +794,13 @@ bb:
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(<32 x float> addrspace(1)* %arg) {
 bb:
-  %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg
+  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
   %a = bitcast i32 1 to <2 x i16>
   %b = bitcast i32 2 to <2 x i16>
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x i32> %in.1, i32 1, i32 2, i32 3)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
   ret void
 }
 
@@ -957,12 +957,12 @@ bb:
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_forward_acc:
 ; GCN:      v_mfma_f32_32x32x1f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}]
 ; GCN-NEXT: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]]
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(<32 x float> addrspace(1)* %arg) {
 bb:
-  %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %in.1, i32 0, i32 0, i32 0)
-  %mai.2 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %mai.1, i32 0, i32 0, i32 0)
-  store <32 x i32> %mai.2, <32 x i32> addrspace(1)* %arg
+  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %mai.1, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.2, <32 x float> addrspace(1)* %arg
   ret void
 }
 
@@ -1112,10 +1112,10 @@ bb:
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(<32 x float> addrspace(1)* %arg) {
 bb:
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 0, i32 0)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
   ret void
 }
 
@@ -1184,7 +1184,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_imm:
 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0
-; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1
+; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0
 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0
 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0
 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0
@@ -1256,10 +1256,10 @@ bb:
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(<32 x float> addrspace(1)* %arg) {
 bb:
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> <i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 0, i32 0)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> <float 1.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
   ret void
 }
 
@@ -1350,12 +1350,12 @@ bb:
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
 ; GCN-DAG: global_store_dwordx4
-define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(<32 x i32> addrspace(1)* %arg) {
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(<32 x float> addrspace(1)* %arg) {
 bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds <32 x i32>, <32 x i32> addrspace(1)* %arg, i32 %tid
-  %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %gep
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %in.1, i32 1, i32 2, i32 3)
-  store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %gep
+  %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid
+  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3)
+  store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
index b12a7bc72a819..9c7279a78e75e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
@@ -84,23 +84,23 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #1 {
 ; A2M:        buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], s{{[0-9]+}} offset:[[FI]] ; 4-byte Folded Reload
 ; GFX908:     v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]]
 ; A2V:        ScratchSize: 0
-define amdgpu_kernel void @max_32regs_mfma32(i32 addrspace(1)* %arg) #3 {
+define amdgpu_kernel void @max_32regs_mfma32(float addrspace(1)* %arg) #3 {
 bb:
   %v = call i32 asm sideeffect "", "=a"()
   br label %use
 
 use:
-  %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 1.0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 2>, i32 0, i32 0, i32 0)
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 1.0, <32 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0, float 17.0, float 18.0, float 19.0, float 20.0, float 21.0, float 22.0, float 23.0, float 24.0, float 25.0, float 26.0, float 27.0, float 28.0, float 29.0, float 30.0, float 31.0, float 2.0>, i32 0, i32 0, i32 0)
   call void asm sideeffect "", "a"(i32 %v)
-  %elt1 = extractelement <32 x i32> %mai.1, i32 0
-  store i32 %elt1, i32 addrspace(1)* %arg
+  %elt1 = extractelement <32 x float> %mai.1, i32 0
+  store float %elt1, float addrspace(1)* %arg
   ret void
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
 declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
-declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32)
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 
 attributes #0 = { nounwind "amdgpu-num-vgpr"="24" }
 attributes #1 = { nounwind "amdgpu-num-vgpr"="8" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
index b101e41833b8e..6eef782d1906d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@@ -233,19 +233,23 @@ define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* %
   ret void
 }
 
+; FIXME: adding an AReg_1024 register class for v32f32 and v32i32
+;        produces unnecessary copies and we still have some amount
+;        of conventional spilling.
+
 ; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32_2bb:
 ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
 ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
-; GFX908-NOT: SCRATCH_RSRC
+; GFX908-FIXME-NOT: SCRATCH_RSRC
 ; GFX908-DAG: v_accvgpr_write_b32 a0, v
 ; GFX900:     buffer_store_dword v
 ; GFX900:     buffer_load_dword v
-; GFX908-NOT: buffer_
+; GFX908-FIXME-NOT: buffer_
 ; GFX908-DAG  v_accvgpr_read_b32
 
 ; GCN:    NumVgprs: 256
 ; GFX900: ScratchSize: 580
-; GFX908: ScratchSize: 0
+; GFX908-FIXME: ScratchSize: 0
 ; GCN:    VGPRBlocks: 63
 ; GCN:    NumVGPRsForWavesPerEU: 256
 define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(<32 x float> addrspace(1)* %p) {

From 882fdf68b74d3199cb84b062709b702ed610f547 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Fri, 12 Jul 2019 23:01:48 +0000
Subject: [PATCH 010/451] Fix non-conformance it `std::tuple`.

Previously we implemented all one trillion tuple-like constructors using
a single generic overload. This worked fairly well, except that it
differed in behavior from the standard version because it didn't
consider both T&& and T const&. This was observable for certain
types.

This patch addresses that issue by splitting the generic constructor
in two. We now provide both T&& and T const& versions of the
tuple-like constructors (sort of).

llvm-svn: 365973
---
 libcxx/include/tuple                          | 51 +++++++++++--------
 .../tuple.cnstr/convert_copy.pass.cpp         | 16 ++++++
 2 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/libcxx/include/tuple b/libcxx/include/tuple
index de30e86c72b48..031d25a9854fc 100644
--- a/libcxx/include/tuple
+++ b/libcxx/include/tuple
@@ -601,6 +601,25 @@ class _LIBCPP_TEMPLATE_VIS tuple
         }
     };
 
+    template <class _Tuple, bool _DisableIfLValue>
+    using _EnableImplicitTupleLikeConstructor = _EnableIf<
+                         _CheckTupleLikeConstructor<
+                             __tuple_like_with_size<_Tuple, sizeof...(_Tp)>::value
+                             && !_PackExpandsToThisTuple<_Tuple>::value
+                             && (!is_lvalue_reference<_Tuple>::value || !_DisableIfLValue)
+                         >::template __enable_implicit<_Tuple>(),
+                         bool
+                      >;
+
+    template <class _Tuple, bool _DisableIfLValue>
+    using _EnableExplicitTupleLikeConstructor = _EnableIf<
+                         _CheckTupleLikeConstructor<
+                             __tuple_like_with_size<_Tuple, sizeof...(_Tp)>::value
+                             && !_PackExpandsToThisTuple<_Tuple>::value
+                             && (!is_lvalue_reference<_Tuple>::value || !_DisableIfLValue)
+                         >::template __enable_explicit<_Tuple>(),
+                         bool
+                      >;
     template <size_t _Jp, class ..._Up> friend _LIBCPP_CONSTEXPR_AFTER_CXX11
         typename tuple_element<_Jp, tuple<_Up...> >::type& get(tuple<_Up...>&) _NOEXCEPT;
     template <size_t _Jp, class ..._Up> friend _LIBCPP_CONSTEXPR_AFTER_CXX11
@@ -815,35 +834,27 @@ public:
                     typename __make_tuple_types<tuple, sizeof...(_Tp), sizeof...(_Up)>::type(),
                     _VSTD::forward<_Up>(__u)...) {}
 
-    template <class _Tuple,
-              typename enable_if
-                      <
-                         _CheckTupleLikeConstructor<
-                             __tuple_like_with_size<_Tuple, sizeof...(_Tp)>::value
-                             && !_PackExpandsToThisTuple<_Tuple>::value
-                         >::template __enable_implicit<_Tuple>(),
-                         bool
-                      >::type = false
-             >
+    template <class _Tuple, _EnableImplicitTupleLikeConstructor<_Tuple, true> = false>
         _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
         tuple(_Tuple&& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, _Tuple>::value))
             : __base_(_VSTD::forward<_Tuple>(__t)) {}
 
-    template <class _Tuple,
-              typename enable_if
-                      <
-                         _CheckTupleLikeConstructor<
-                             __tuple_like_with_size<_Tuple, sizeof...(_Tp)>::value
-                             && !_PackExpandsToThisTuple<_Tuple>::value
-                         >::template __enable_explicit<_Tuple>(),
-                         bool
-                      >::type = false
-             >
+    template <class _Tuple, _EnableImplicitTupleLikeConstructor<const _Tuple&, false> = false>
+        _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+        tuple(const _Tuple& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, const _Tuple&>::value))
+            : __base_(__t) {}
+    template <class _Tuple, _EnableExplicitTupleLikeConstructor<_Tuple, true> = false>
         _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
         explicit
         tuple(_Tuple&& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, _Tuple>::value))
             : __base_(_VSTD::forward<_Tuple>(__t)) {}
 
+    template <class _Tuple, _EnableExplicitTupleLikeConstructor<const _Tuple&, false> = false>
+        _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11
+        explicit
+        tuple(const _Tuple& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, const _Tuple&>::value))
+            : __base_(__t) {}
+
     template <class _Alloc, class _Tuple,
               typename enable_if
                       <
diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_copy.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_copy.pass.cpp
index 41f73328ab7b0..89f67a227bd1f 100644
--- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_copy.pass.cpp
+++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/convert_copy.pass.cpp
@@ -31,6 +31,15 @@ struct Implicit {
   Implicit(int x) : value(x) {}
 };
 
+struct ExplicitTwo {
+    ExplicitTwo() {}
+    ExplicitTwo(ExplicitTwo const&) {}
+    ExplicitTwo(ExplicitTwo &&) {}
+
+    template <class T, class = typename std::enable_if<!std::is_same<T, ExplicitTwo>::value>::type>
+    explicit ExplicitTwo(T) {}
+};
+
 struct B
 {
     int id_;
@@ -136,6 +145,13 @@ int main(int, char**)
         std::tuple<Implicit> t2 = t1;
         assert(std::get<0>(t2).value == 42);
     }
+    {
+        static_assert(std::is_convertible<ExplicitTwo&&, ExplicitTwo>::value, "");
+        static_assert(std::is_convertible<std::tuple<ExplicitTwo&&>&&, const std::tuple<ExplicitTwo>&>::value, "");
 
+        ExplicitTwo e;
+        std::tuple<ExplicitTwo> t = std::tuple<ExplicitTwo&&>(std::move(e));
+        ((void)t);
+    }
   return 0;
 }

From 51a52b58930cd1bb2351bf7017adfd55073f6553 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Fri, 12 Jul 2019 23:30:55 +0000
Subject: [PATCH 011/451] PDB HashTable: Move TraitsT from class parameter to
 the methods that need it

The traits object is only used by a few methods. Deserializing a hash
table and walking it is possible without the traits object, so it
shouldn't be required to build a dummy object for that use case.

The TraitsT object used to be a function template parameter before
r327647, this restores it to that state.

This makes it clear that the traits object isn't needed at all in 1 of
the current 3 uses of HashTable (and I am going to add another use that
doesn't need it), and that the default PdbHashTraits isn't used outside
of tests.

While here, also re-enable 3 checks in the test that were commented out
(which requires making HashTableInternals templated and giving FooBar
an operator==).

No intended behavior change.

Differential Revision: https://reviews.llvm.org/D64640

llvm-svn: 365974
---
 .../llvm/DebugInfo/PDB/Native/HashTable.h     |  63 +++++-----
 .../DebugInfo/PDB/Native/NamedStreamMap.h     |   2 +-
 .../DebugInfo/PDB/Native/PDBFileBuilder.h     |   2 +-
 .../DebugInfo/PDB/Native/NamedStreamMap.cpp   |   7 +-
 .../DebugInfo/PDB/Native/PDBFileBuilder.cpp   |   5 +-
 .../unittests/DebugInfo/PDB/HashTableTest.cpp | 119 ++++++++++--------
 6 files changed, 102 insertions(+), 96 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 86c43a482b820..b00873b575b20 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -31,21 +31,21 @@ namespace pdb {
 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
 
-template <typename ValueT, typename TraitsT> class HashTable;
+template <typename ValueT> class HashTable;
 
-template <typename ValueT, typename TraitsT>
+template <typename ValueT>
 class HashTableIterator
-    : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
+    : public iterator_facade_base<HashTableIterator<ValueT>,
                                   std::forward_iterator_tag,
                                   std::pair<uint32_t, ValueT>> {
-  friend HashTable<ValueT, TraitsT>;
+  friend HashTable<ValueT>;
 
-  HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index,
+  HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
                     bool IsEnd)
       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
 
 public:
-  HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) {
+  HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
     int I = Map.Present.find_first();
     if (I == -1) {
       Index = 0;
@@ -87,22 +87,14 @@ class HashTableIterator
   bool isEnd() const { return IsEnd; }
   uint32_t index() const { return Index; }
 
-  const HashTable<ValueT, TraitsT> *Map;
+  const HashTable<ValueT> *Map;
   uint32_t Index;
   bool IsEnd;
 };
 
-template <typename T> struct PdbHashTraits {};
-
-template <> struct PdbHashTraits<uint32_t> {
-  uint32_t hashLookupKey(uint32_t N) const { return N; }
-  uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
-  uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
-};
-
-template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
+template <typename ValueT>
 class HashTable {
-  using iterator = HashTableIterator<ValueT, TraitsT>;
+  using iterator = HashTableIterator<ValueT>;
   friend iterator;
 
   struct Header {
@@ -114,9 +106,7 @@ class HashTable {
 
 public:
   HashTable() { Buckets.resize(8); }
-
-  explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
-  HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
+  explicit HashTable(uint32_t Capacity) {
     Buckets.resize(Capacity);
   }
 
@@ -221,7 +211,8 @@ class HashTable {
 
   /// Find the entry whose key has the specified hash value, using the specified
   /// traits defining hash function and equality.
-  template <typename Key> iterator find_as(const Key &K) const {
+  template <typename Key, typename TraitsT>
+  iterator find_as(const Key &K, TraitsT &Traits) const {
     uint32_t H = Traits.hashLookupKey(K) % capacity();
     uint32_t I = H;
     Optional<uint32_t> FirstUnused;
@@ -252,12 +243,14 @@ class HashTable {
 
   /// Set the entry using a key type that the specified Traits can convert
   /// from a real key to an internal key.
-  template <typename Key> bool set_as(const Key &K, ValueT V) {
-    return set_as_internal(K, std::move(V), None);
+  template <typename Key, typename TraitsT>
+  bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
+    return set_as_internal(K, std::move(V), Traits, None);
   }
 
-  template <typename Key> ValueT get(const Key &K) const {
-    auto Iter = find_as(K);
+  template <typename Key, typename TraitsT>
+  ValueT get(const Key &K, TraitsT &Traits) const {
+    auto Iter = find_as(K, Traits);
     assert(Iter != end());
     return (*Iter).second;
   }
@@ -266,7 +259,6 @@ class HashTable {
   bool isPresent(uint32_t K) const { return Present.test(K); }
   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
 
-  TraitsT Traits;
   BucketList Buckets;
   mutable SparseBitVector<> Present;
   mutable SparseBitVector<> Deleted;
@@ -274,9 +266,10 @@ class HashTable {
 private:
   /// Set the entry using a key type that the specified Traits can convert
   /// from a real key to an internal key.
-  template <typename Key>
-  bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
-    auto Entry = find_as(K);
+  template <typename Key, typename TraitsT>
+  bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
+                       Optional<uint32_t> InternalKey) {
+    auto Entry = find_as(K, Traits);
     if (Entry != end()) {
       assert(isPresent(Entry.index()));
       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
@@ -293,15 +286,16 @@ class HashTable {
     Present.set(Entry.index());
     Deleted.reset(Entry.index());
 
-    grow();
+    grow(Traits);
 
-    assert((find_as(K)) != end());
+    assert((find_as(K, Traits)) != end());
     return true;
   }
 
   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
 
-  void grow() {
+  template <typename TraitsT>
+  void grow(TraitsT &Traits) {
     uint32_t S = size();
     uint32_t MaxLoad = maxLoad(capacity());
     if (S < maxLoad(capacity()))
@@ -313,10 +307,11 @@ class HashTable {
     // Growing requires rebuilding the table and re-hashing every item.  Make a
     // copy with a larger capacity, insert everything into the copy, then swap
     // it in.
-    HashTable NewMap(NewCapacity, Traits);
+    HashTable NewMap(NewCapacity);
     for (auto I : Present) {
       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
-      NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
+      NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
+                             Buckets[I].first);
     }
 
     Buckets.swap(NewMap.Buckets);
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index c49d796356c7b..1df059ffa9fda 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -59,7 +59,7 @@ class NamedStreamMap {
   NamedStreamMapTraits HashTraits;
   /// Closed hash table from Offset -> StreamNumber, where Offset is the offset
   /// of the stream name in NamesBuffer.
-  HashTable<support::ulittle32_t, NamedStreamMapTraits> OffsetIndexMap;
+  HashTable<support::ulittle32_t> OffsetIndexMap;
 
   /// Buffer of string data.
   std::vector<char> NamesBuffer;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 72000bdc011ac..2abaa5f4cdc47 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -97,7 +97,7 @@ class PDBFileBuilder {
 
   PDBStringTableBuilder Strings;
   StringTableHashTraits InjectedSourceHashTraits;
-  HashTable<SrcHeaderBlockEntry, StringTableHashTraits> InjectedSourceTable;
+  HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
 
   SmallVector<InjectedSourceDescriptor, 2> InjectedSources;
 
diff --git a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
index 1c044e0c26538..4a88391494cd2 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -46,8 +46,7 @@ uint32_t NamedStreamMapTraits::lookupKeyToStorageKey(StringRef S) {
   return NS->appendStringData(S);
 }
 
-NamedStreamMap::NamedStreamMap()
-    : HashTraits(*this), OffsetIndexMap(1, HashTraits) {}
+NamedStreamMap::NamedStreamMap() : HashTraits(*this), OffsetIndexMap(1) {}
 
 Error NamedStreamMap::load(BinaryStreamReader &Stream) {
   uint32_t StringBufferSize;
@@ -99,7 +98,7 @@ uint32_t NamedStreamMap::hashString(uint32_t Offset) const {
 }
 
 bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const {
-  auto Iter = OffsetIndexMap.find_as(Stream);
+  auto Iter = OffsetIndexMap.find_as(Stream, HashTraits);
   if (Iter == OffsetIndexMap.end())
     return false;
   StreamNo = (*Iter).second;
@@ -123,5 +122,5 @@ uint32_t NamedStreamMap::appendStringData(StringRef S) {
 }
 
 void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) {
-  OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo));
+  OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo), HashTraits);
 }
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 84eb4fbbfa631..8f5a048ea4b56 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -34,7 +34,7 @@ using namespace llvm::support;
 
 PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator)
     : Allocator(Allocator), InjectedSourceHashTraits(Strings),
-      InjectedSourceTable(2, InjectedSourceHashTraits) {}
+      InjectedSourceTable(2) {}
 
 PDBFileBuilder::~PDBFileBuilder() {}
 
@@ -189,7 +189,8 @@ Error PDBFileBuilder::finalizeMsfLayout() {
           static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne);
       Entry.CRC = CRC.getCRC();
       StringRef VName = getStringTableBuilder().getStringForId(IS.VNameIndex);
-      InjectedSourceTable.set_as(VName, std::move(Entry));
+      InjectedSourceTable.set_as(VName, std::move(Entry),
+                                 InjectedSourceHashTraits);
     }
 
     uint32_t SrcHeaderBlockSize =
diff --git a/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp b/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp
index 4ebde45ff9a62..5f0695bc4cb22 100644
--- a/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp
+++ b/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp
@@ -27,27 +27,35 @@ using namespace llvm::support;
 
 namespace {
 
-class HashTableInternals : public HashTable<uint32_t> {
+struct IdentityHashTraits {
+  uint32_t hashLookupKey(uint32_t N) const { return N; }
+  uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
+  uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
+};
+
+template <class T = uint32_t>
+class HashTableInternals : public HashTable<T> {
 public:
-  using HashTable::Buckets;
-  using HashTable::Present;
-  using HashTable::Deleted;
+  using HashTable<T>::Buckets;
+  using HashTable<T>::Present;
+  using HashTable<T>::Deleted;
 };
 }
 
 TEST(HashTableTest, TestSimple) {
-  HashTableInternals Table;
+  HashTableInternals<> Table;
   EXPECT_EQ(0u, Table.size());
   EXPECT_GT(Table.capacity(), 0u);
 
-  Table.set_as(3u, 7);
+  IdentityHashTraits Traits;
+  Table.set_as(3u, 7, Traits);
   EXPECT_EQ(1u, Table.size());
-  ASSERT_NE(Table.end(), Table.find_as(3u));
-  EXPECT_EQ(7u, Table.get(3u));
+  ASSERT_NE(Table.end(), Table.find_as(3u, Traits));
+  EXPECT_EQ(7u, Table.get(3u, Traits));
 }
 
 TEST(HashTableTest, TestCollision) {
-  HashTableInternals Table;
+  HashTableInternals<> Table;
   EXPECT_EQ(0u, Table.size());
   EXPECT_GT(Table.capacity(), 0u);
 
@@ -57,33 +65,35 @@ TEST(HashTableTest, TestCollision) {
   uint32_t N1 = Table.capacity() + 1;
   uint32_t N2 = 2 * N1;
 
-  Table.set_as(N1, 7);
-  Table.set_as(N2, 12);
+  IdentityHashTraits Traits;
+  Table.set_as(N1, 7, Traits);
+  Table.set_as(N2, 12, Traits);
   EXPECT_EQ(2u, Table.size());
-  ASSERT_NE(Table.end(), Table.find_as(N1));
-  ASSERT_NE(Table.end(), Table.find_as(N2));
+  ASSERT_NE(Table.end(), Table.find_as(N1, Traits));
+  ASSERT_NE(Table.end(), Table.find_as(N2, Traits));
 
-  EXPECT_EQ(7u, Table.get(N1));
-  EXPECT_EQ(12u, Table.get(N2));
+  EXPECT_EQ(7u, Table.get(N1, Traits));
+  EXPECT_EQ(12u, Table.get(N2, Traits));
 }
 
 TEST(HashTableTest, TestRemove) {
-  HashTableInternals Table;
+  HashTableInternals<> Table;
   EXPECT_EQ(0u, Table.size());
   EXPECT_GT(Table.capacity(), 0u);
 
-  Table.set_as(1u, 2);
-  Table.set_as(3u, 4);
+  IdentityHashTraits Traits;
+  Table.set_as(1u, 2, Traits);
+  Table.set_as(3u, 4, Traits);
   EXPECT_EQ(2u, Table.size());
-  ASSERT_NE(Table.end(), Table.find_as(1u));
-  ASSERT_NE(Table.end(), Table.find_as(3u));
+  ASSERT_NE(Table.end(), Table.find_as(1u, Traits));
+  ASSERT_NE(Table.end(), Table.find_as(3u, Traits));
 
-  EXPECT_EQ(2u, Table.get(1u));
-  EXPECT_EQ(4u, Table.get(3u));
+  EXPECT_EQ(2u, Table.get(1u, Traits));
+  EXPECT_EQ(4u, Table.get(3u, Traits));
 }
 
 TEST(HashTableTest, TestCollisionAfterMultipleProbes) {
-  HashTableInternals Table;
+  HashTableInternals<> Table;
   EXPECT_EQ(0u, Table.size());
   EXPECT_GT(Table.capacity(), 0u);
 
@@ -94,17 +104,18 @@ TEST(HashTableTest, TestCollisionAfterMultipleProbes) {
   uint32_t N2 = N1 + 1;
   uint32_t N3 = 2 * N1;
 
-  Table.set_as(N1, 7);
-  Table.set_as(N2, 11);
-  Table.set_as(N3, 13);
+  IdentityHashTraits Traits;
+  Table.set_as(N1, 7, Traits);
+  Table.set_as(N2, 11, Traits);
+  Table.set_as(N3, 13, Traits);
   EXPECT_EQ(3u, Table.size());
-  ASSERT_NE(Table.end(), Table.find_as(N1));
-  ASSERT_NE(Table.end(), Table.find_as(N2));
-  ASSERT_NE(Table.end(), Table.find_as(N3));
+  ASSERT_NE(Table.end(), Table.find_as(N1, Traits));
+  ASSERT_NE(Table.end(), Table.find_as(N2, Traits));
+  ASSERT_NE(Table.end(), Table.find_as(N3, Traits));
 
-  EXPECT_EQ(7u, Table.get(N1));
-  EXPECT_EQ(11u, Table.get(N2));
-  EXPECT_EQ(13u, Table.get(N3));
+  EXPECT_EQ(7u, Table.get(N1, Traits));
+  EXPECT_EQ(11u, Table.get(N2, Traits));
+  EXPECT_EQ(13u, Table.get(N3, Traits));
 }
 
 TEST(HashTableTest, Grow) {
@@ -112,24 +123,26 @@ TEST(HashTableTest, Grow) {
   // guaranteed to trigger a grow.  Then verify that the size is the same, the
   // capacity is larger, and all the original items are still in the table.
 
-  HashTableInternals Table;
+  HashTableInternals<> Table;
+  IdentityHashTraits Traits;
   uint32_t OldCapacity = Table.capacity();
   for (uint32_t I = 0; I < OldCapacity; ++I) {
-    Table.set_as(OldCapacity + I * 2 + 1, I * 2 + 3);
+    Table.set_as(OldCapacity + I * 2 + 1, I * 2 + 3, Traits);
   }
   EXPECT_EQ(OldCapacity, Table.size());
   EXPECT_GT(Table.capacity(), OldCapacity);
   for (uint32_t I = 0; I < OldCapacity; ++I) {
-    ASSERT_NE(Table.end(), Table.find_as(OldCapacity + I * 2 + 1));
-    EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1));
+    ASSERT_NE(Table.end(), Table.find_as(OldCapacity + I * 2 + 1, Traits));
+    EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1, Traits));
   }
 }
 
 TEST(HashTableTest, Serialization) {
-  HashTableInternals Table;
+  HashTableInternals<> Table;
+  IdentityHashTraits Traits;
   uint32_t Cap = Table.capacity();
   for (uint32_t I = 0; I < Cap; ++I) {
-    Table.set_as(Cap + I * 2 + 1, I * 2 + 3);
+    Table.set_as(Cap + I * 2 + 1, I * 2 + 3, Traits);
   }
 
   std::vector<uint8_t> Buffer(Table.calculateSerializedLength());
@@ -139,7 +152,7 @@ TEST(HashTableTest, Serialization) {
   // We should have written precisely the number of bytes we calculated earlier.
   EXPECT_EQ(Buffer.size(), Writer.getOffset());
 
-  HashTableInternals Table2;
+  HashTableInternals<> Table2;
   BinaryStreamReader Reader(Stream);
   EXPECT_THAT_ERROR(Table2.load(Reader), Succeeded());
   // We should have read precisely the number of bytes we calculated earlier.
@@ -192,20 +205,19 @@ TEST(HashTableTest, NamedStreamMap) {
   } while (std::next_permutation(Streams.begin(), Streams.end()));
 }
 
-namespace {
 struct FooBar {
   uint32_t X;
   uint32_t Y;
-};
 
-} // namespace
+  bool operator==(const FooBar &RHS) const {
+    return X == RHS.X && Y == RHS.Y;
+  }
+};
 
-namespace llvm {
-namespace pdb {
-template <> struct PdbHashTraits<FooBar> {
+struct FooBarHashTraits {
   std::vector<char> Buffer;
 
-  PdbHashTraits() { Buffer.push_back(0); }
+  FooBarHashTraits() { Buffer.push_back(0); }
 
   uint32_t hashLookupKey(StringRef S) const {
     return llvm::pdb::hashStringV1(S);
@@ -225,17 +237,16 @@ template <> struct PdbHashTraits<FooBar> {
     return N;
   }
 };
-} // namespace pdb
-} // namespace llvm
 
 TEST(HashTableTest, NonTrivialValueType) {
-  HashTable<FooBar> Table;
+  HashTableInternals<FooBar> Table;
+  FooBarHashTraits Traits;
   uint32_t Cap = Table.capacity();
   for (uint32_t I = 0; I < Cap; ++I) {
     FooBar F;
     F.X = I;
     F.Y = I + 1;
-    Table.set_as(utostr(I), F);
+    Table.set_as(utostr(I), F, Traits);
   }
 
   std::vector<uint8_t> Buffer(Table.calculateSerializedLength());
@@ -245,7 +256,7 @@ TEST(HashTableTest, NonTrivialValueType) {
   // We should have written precisely the number of bytes we calculated earlier.
   EXPECT_EQ(Buffer.size(), Writer.getOffset());
 
-  HashTable<FooBar> Table2;
+  HashTableInternals<FooBar> Table2;
   BinaryStreamReader Reader(Stream);
   EXPECT_THAT_ERROR(Table2.load(Reader), Succeeded());
   // We should have read precisely the number of bytes we calculated earlier.
@@ -253,7 +264,7 @@ TEST(HashTableTest, NonTrivialValueType) {
 
   EXPECT_EQ(Table.size(), Table2.size());
   EXPECT_EQ(Table.capacity(), Table2.capacity());
-  // EXPECT_EQ(Table.Buckets, Table2.Buckets);
-  // EXPECT_EQ(Table.Present, Table2.Present);
-  // EXPECT_EQ(Table.Deleted, Table2.Deleted);
+  EXPECT_EQ(Table.Buckets, Table2.Buckets);
+  EXPECT_EQ(Table.Present, Table2.Present);
+  EXPECT_EQ(Table.Deleted, Table2.Deleted);
 }

From 5d9d7c59ee3353675ec89653c3a2b0175305d59a Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Fri, 12 Jul 2019 23:38:31 +0000
Subject: [PATCH 012/451] Re-land [JSONCompilationDatabase] Strip
 distcc/ccache/gomacc wrappers from parsed commands.

Use //net/dir like other test cases for windows compatibility

llvm-svn: 365975
---
 clang/lib/Tooling/JSONCompilationDatabase.cpp | 50 +++++++++++++++++--
 .../Tooling/CompilationDatabaseTest.cpp       | 27 ++++++++++
 2 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp b/clang/lib/Tooling/JSONCompilationDatabase.cpp
index 76a82b0fd9bd3..f19a0f7550b96 100644
--- a/clang/lib/Tooling/JSONCompilationDatabase.cpp
+++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp
@@ -256,15 +256,57 @@ JSONCompilationDatabase::getAllCompileCommands() const {
   return Commands;
 }
 
+static llvm::StringRef stripExecutableExtension(llvm::StringRef Name) {
+  Name.consume_back(".exe");
+  return Name;
+}
+
+// There are compiler-wrappers (ccache, distcc, gomacc) that take the "real"
+// compiler as an argument, e.g. distcc gcc -O3 foo.c.
+// These end up in compile_commands.json when people set CC="distcc gcc".
+// Clang's driver doesn't understand this, so we need to unwrap.
+static bool unwrapCommand(std::vector<std::string> &Args) {
+  if (Args.size() < 2)
+    return false;
+  StringRef Wrapper =
+      stripExecutableExtension(llvm::sys::path::filename(Args.front()));
+  if (Wrapper == "distcc" || Wrapper == "gomacc" || Wrapper == "ccache") {
+    // Most of these wrappers support being invoked 3 ways:
+    // `distcc g++ file.c` This is the mode we're trying to match.
+    //                     We need to drop `distcc`.
+    // `distcc file.c`     This acts like compiler is cc or similar.
+    //                     Clang's driver can handle this, no change needed.
+    // `g++ file.c`        g++ is a symlink to distcc.
+    //                     We don't even notice this case, and all is well.
+    //
+    // We need to distinguish between the first and second case.
+    // The wrappers themselves don't take flags, so Args[1] is a compiler flag,
+    // an input file, or a compiler. Inputs have extensions, compilers don't.
+    bool HasCompiler =
+        (Args[1][0] != '-') &&
+        !llvm::sys::path::has_extension(stripExecutableExtension(Args[1]));
+    if (HasCompiler) {
+      Args.erase(Args.begin());
+      return true;
+    }
+    // If !HasCompiler, wrappers act like GCC. Fine: so do we.
+  }
+  return false;
+}
+
 static std::vector<std::string>
 nodeToCommandLine(JSONCommandLineSyntax Syntax,
                   const std::vector<llvm::yaml::ScalarNode *> &Nodes) {
   SmallString<1024> Storage;
-  if (Nodes.size() == 1)
-    return unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage));
   std::vector<std::string> Arguments;
-  for (const auto *Node : Nodes)
-    Arguments.push_back(Node->getValue(Storage));
+  if (Nodes.size() == 1)
+    Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage));
+  else
+    for (const auto *Node : Nodes)
+      Arguments.push_back(Node->getValue(Storage));
+  // There may be multiple wrappers: using distcc and ccache together is common.
+  while (unwrapCommand(Arguments))
+    ;
   return Arguments;
 }
 
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index da7ae09917dfa..fde95445bdab0 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -370,6 +370,33 @@ TEST(findCompileArgsInJsonDatabase, FindsEntry) {
   EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage;
 }
 
+TEST(findCompileArgsInJsonDatabase, ParsesCompilerWrappers) {
+  StringRef Directory("//net/dir");
+  StringRef FileName("//net/dir/filename");
+  std::vector<std::pair<std::string, std::string>> Cases = {
+      {"distcc gcc foo.c", "gcc foo.c"},
+      {"gomacc clang++ foo.c", "clang++ foo.c"},
+      {"ccache gcc foo.c", "gcc foo.c"},
+      {"ccache.exe gcc foo.c", "gcc foo.c"},
+      {"ccache g++.exe foo.c", "g++.exe foo.c"},
+      {"ccache distcc gcc foo.c", "gcc foo.c"},
+
+      {"distcc foo.c", "distcc foo.c"},
+      {"distcc -I/foo/bar foo.c", "distcc -I/foo/bar foo.c"},
+  };
+  std::string ErrorMessage;
+
+  for (const auto &Case : Cases) {
+    std::string DB =
+        R"([{"directory":"//net/dir", "file":"//net/dir/foo.c", "command":")" +
+        Case.first + "\"}]";
+    CompileCommand FoundCommand =
+        findCompileArgsInJsonDatabase("//net/dir/foo.c", DB, ErrorMessage);
+    EXPECT_EQ(Case.second, llvm::join(FoundCommand.CommandLine, " "))
+        << Case.first;
+  }
+}
+
 static std::vector<std::string> unescapeJsonCommandLine(StringRef Command) {
   std::string JsonDatabase =
     ("[{\"directory\":\"//net/root\", \"file\":\"test\", \"command\": \"" +

From 4765aa14ff429db9301047296c9bd8e201bcb3a2 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Sat, 13 Jul 2019 00:09:04 +0000
Subject: [PATCH 013/451] [DirectoryWatcher][test][NFC] Add information to test
 failure reports

llvm-svn: 365976
---
 .../DirectoryWatcher/DirectoryWatcherTest.cpp | 27 +++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
index 0808ff47dee89..72bc86d4493cf 100644
--- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
+++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
@@ -97,7 +97,9 @@ std::string eventKindToString(const DirectoryWatcher::Event::EventKind K) {
 
 struct VerifyingConsumer {
   std::vector<DirectoryWatcher::Event> ExpectedInitial;
+  const std::vector<DirectoryWatcher::Event> ExpectedInitialCopy;
   std::vector<DirectoryWatcher::Event> ExpectedNonInitial;
+  const std::vector<DirectoryWatcher::Event> ExpectedNonInitialCopy;
   std::vector<DirectoryWatcher::Event> OptionalNonInitial;
   std::vector<DirectoryWatcher::Event> UnexpectedInitial;
   std::vector<DirectoryWatcher::Event> UnexpectedNonInitial;
@@ -108,8 +110,8 @@ struct VerifyingConsumer {
       const std::vector<DirectoryWatcher::Event> &ExpectedInitial,
       const std::vector<DirectoryWatcher::Event> &ExpectedNonInitial,
       const std::vector<DirectoryWatcher::Event> &OptionalNonInitial = {})
-      : ExpectedInitial(ExpectedInitial),
-        ExpectedNonInitial(ExpectedNonInitial),
+      : ExpectedInitial(ExpectedInitial), ExpectedInitialCopy(ExpectedInitial),
+        ExpectedNonInitial(ExpectedNonInitial), ExpectedNonInitialCopy(ExpectedNonInitial),
         OptionalNonInitial(OptionalNonInitial) {}
 
   // This method is used by DirectoryWatcher.
@@ -181,6 +183,26 @@ struct VerifyingConsumer {
   }
 
   void printUnmetExpectations(llvm::raw_ostream &OS) {
+    // If there was any issue, print the expected state
+    if (
+      !ExpectedInitial.empty()
+      ||
+      !ExpectedNonInitial.empty()
+      ||
+      !UnexpectedInitial.empty()
+      ||
+      !UnexpectedNonInitial.empty()
+    ) {
+      OS << "Expected initial events: \n";
+      for (const auto &E : ExpectedInitialCopy) {
+        OS << eventKindToString(E.Kind) << " " << E.Filename << "\n";
+      }
+      OS << "Expected non-initial events: \n";
+      for (const auto &E : ExpectedNonInitialCopy) {
+        OS << eventKindToString(E.Kind) << " " << E.Filename << "\n";
+      }
+    }
+
     if (!ExpectedInitial.empty()) {
       OS << "Expected but not seen initial events: \n";
       for (const auto &E : ExpectedInitial) {
@@ -218,6 +240,7 @@ void checkEventualResultWithTimeout(VerifyingConsumer &TestConsumer) {
   EXPECT_TRUE(WaitForExpectedStateResult.wait_for(std::chrono::seconds(3)) ==
               std::future_status::ready)
       << "The expected result state wasn't reached before the time-out.";
+  std::unique_lock<std::mutex> L(TestConsumer.Mtx);
   EXPECT_TRUE(TestConsumer.result().hasValue());
   if (TestConsumer.result().hasValue()) {
     EXPECT_TRUE(*TestConsumer.result());

From de85380fa02506ebb7ebbd46b4eb3d80f5619e38 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Sat, 13 Jul 2019 00:09:27 +0000
Subject: [PATCH 014/451] [Attributor][FIX] Lookup of (call site) argument
 attributes

llvm-svn: 365977
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 3a8e88bd20ff5..0b72394e8e9ec 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -179,9 +179,12 @@ struct Attributor {
     assert(AAType::ID != Attribute::None &&
            "Cannot lookup generic abstract attributes!");
 
-    // Determine the argument number automatically for llvm::Arguments.
+    // Determine the argument number automatically for llvm::Arguments if none
+    // is set. Do not override a given one as it could be a use of the argument
+    // in a call site.
     if (auto *Arg = dyn_cast<Argument>(&V))
-      ArgNo = Arg->getArgNo();
+      if (ArgNo == -1)
+        ArgNo = Arg->getArgNo();
 
     // If a function was given together with an argument number, perform the
     // lookup for the actual argument instead. Don't do it for variadic

From b016de51e04fbade2b14d74e36b2cfc6edde9394 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Sat, 13 Jul 2019 00:12:22 +0000
Subject: [PATCH 015/451] [DWARFContext] Strip leading dot in section names

The LLVM context doesn't expect the leading dot in the section name.

llvm-svn: 365978
---
 lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp
index 2f693fe5c3243..eb307ce1cce1b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp
@@ -116,6 +116,8 @@ llvm::DWARFContext &DWARFContext::GetAsLLVM() {
 
       llvm::StringRef data = llvm::toStringRef(section_data.GetData());
       llvm::StringRef name = section.GetName().GetStringRef();
+      if (name.startswith("."))
+        name = name.drop_front();
       section_map.try_emplace(
           name, llvm::MemoryBuffer::getMemBuffer(data, name, false));
     };

From 0291d309291f69525fdf61072b6790fd5e1d67bf Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Sat, 13 Jul 2019 00:20:34 +0000
Subject: [PATCH 016/451] [COFF] Add null check in case of symbols defined in
 LTO blobs

The test case could probably be improved further if the failure path was
better understood.

Fixes PR42536

llvm-svn: 365979
---
 lld/COFF/SymbolTable.cpp                      |  2 +-
 .../COFF/Inputs/undefined-symbol-lto-a.ll     | 82 +++++++++++++++++++
 .../COFF/Inputs/undefined-symbol-lto-b.ll     | 29 +++++++
 lld/test/COFF/undefined-symbol-lto.test       | 30 +++++++
 4 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
 create mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
 create mode 100644 lld/test/COFF/undefined-symbol-lto.test

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 280a9c28892c8..2173c10c1ca56 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -69,7 +69,7 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
 
   for (Symbol *s : sc->file->getSymbols()) {
     auto *d = dyn_cast_or_null<DefinedRegular>(s);
-    if (!d || d->getChunk() != sc || d->getValue() > addr ||
+    if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr ||
         (candidate && d->getValue() < candidate->getValue()))
       continue;
 
diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
new file mode 100644
index 0000000000000..6793ec718e806
--- /dev/null
+++ b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
@@ -0,0 +1,82 @@
+; ModuleID = 't.obj'
+source_filename = "t.cpp"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.21.27702"
+
+%struct.Init = type { %struct.S }
+%struct.S = type { i32 (...)** }
+%rtti.CompleteObjectLocator = type { i32, i32, i32, i32, i32, i32 }
+%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] }
+%rtti.ClassHierarchyDescriptor = type { i32, i32, i32, i32 }
+%rtti.BaseClassDescriptor = type { i32, i32, i32, i32, i32, i32, i32 }
+
+$"??_SS@@6B@" = comdat largest
+
+$"??_R4S@@6B@" = comdat any
+
+$"??_R0?AUS@@@8" = comdat any
+
+$"??_R3S@@8" = comdat any
+
+$"??_R2S@@8" = comdat any
+
+$"??_R1A@?0A@EA@S@@8" = comdat any
+
+@"?d@@3UInit@@A" = dso_local local_unnamed_addr global %struct.Init zeroinitializer, align 8
+@anon.bcb2691509de99310dddb690fcdb4cdc.0 = private unnamed_addr constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*)* @"?foo@S@@UEAAXXZ" to i8*)] }, comdat($"??_SS@@6B@"), !type !0
+@"??_R4S@@6B@" = linkonce_odr constant %rtti.CompleteObjectLocator { i32 1, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
+@"??_7type_info@@6B@" = external constant i8*
+@"??_R0?AUS@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"??_7type_info@@6B@", i8* null, [8 x i8] c".?AUS@@\00" }, comdat
+@__ImageBase = external dso_local constant i8
+@"??_R3S@@8" = linkonce_odr constant %rtti.ClassHierarchyDescriptor { i32 0, i32 0, i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint ([2 x i32]* @"??_R2S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
+@"??_R2S@@8" = linkonce_odr constant [2 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.BaseClassDescriptor* @"??_R1A@?0A@EA@S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0], comdat
+@"??_R1A@?0A@EA@S@@8" = linkonce_odr constant %rtti.BaseClassDescriptor { i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 0, i32 -1, i32 0, i32 64, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_t.cpp, i8* null }]
+
+@"??_SS@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @anon.bcb2691509de99310dddb690fcdb4cdc.0, i32 0, i32 0, i32 1)
+
+declare dso_local void @"?undefined_ref@@YAXXZ"() local_unnamed_addr #0
+
+declare dllimport void @"?foo@S@@UEAAXXZ"(%struct.S*) unnamed_addr #0
+
+; Function Attrs: nounwind sspstrong uwtable
+define internal void @_GLOBAL__sub_I_t.cpp() #1 {
+entry:
+  store i32 (...)** bitcast (i8** @"??_SS@@6B@" to i32 (...)**), i32 (...)*** getelementptr inbounds (%struct.Init, %struct.Init* @"?d@@3UInit@@A", i64 0, i32 0, i32 0), align 8
+  tail call void @"?undefined_ref@@YAXXZ"() #2
+  ret void
+}
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.linker.options = !{!1, !2}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = !{i64 8, !"?AUS@@"}
+!1 = !{!"/DEFAULTLIB:libcmt.lib"}
+!2 = !{!"/DEFAULTLIB:oldnames.lib"}
+!3 = !{i32 1, !"wchar_size", i32 2}
+!4 = !{i32 7, !"PIC Level", i32 2}
+!5 = !{i32 1, !"ThinLTO", i32 0}
+!6 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!7 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"}
+
+^0 = module: (path: "t.obj", hash: (0, 0, 0, 0, 0))
+^1 = gv: (name: "__ImageBase") ; guid = 434928772013489304
+^2 = gv: (name: "??_R2S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^6)))) ; guid = 2160898732728284029
+^3 = gv: (name: "llvm.global_ctors", summaries: (variable: (module: ^0, flags: (linkage: appending, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^14)))) ; guid = 2412314959268824392
+^4 = gv: (name: "?foo@S@@UEAAXXZ") ; guid = 6578172636330484861
+^5 = gv: (name: "??_SS@@6B@", summaries: (alias: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^10))) ; guid = 8774897714842691026
+^6 = gv: (name: "??_R1A@?0A@EA@S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^11, ^1, ^8)))) ; guid = 9397802696236423453
+^7 = gv: (name: "?undefined_ref@@YAXXZ") ; guid = 9774674600202276560
+^8 = gv: (name: "??_R3S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^2)))) ; guid = 10685958509605791599
+^9 = gv: (name: "??_7type_info@@6B@") ; guid = 10826752452437539368
+^10 = gv: (name: "anon.bcb2691509de99310dddb690fcdb4cdc.0", summaries: (variable: (module: ^0, flags: (linkage: private, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), vTableFuncs: ((virtFunc: ^4, offset: 8)), refs: (^13, ^4)))) ; guid = 11510395461204283992
+^11 = gv: (name: "??_R0?AUS@@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^9)))) ; guid = 12346607659584231960
+^12 = gv: (name: "?d@@3UInit@@A", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1)))) ; guid = 14563354643524156382
+^13 = gv: (name: "??_R4S@@6B@", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^13, ^11, ^1, ^8)))) ; guid = 14703528065171087394
+^14 = gv: (name: "_GLOBAL__sub_I_t.cpp", summaries: (function: (module: ^0, flags: (linkage: internal, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 3, calls: ((callee: ^7)), refs: (^12, ^5)))) ; guid = 15085897428757412588
+^15 = typeidCompatibleVTable: (name: "?AUS@@", summary: ((offset: 8, ^10))) ; guid = 13986515119763165370
diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
new file mode 100644
index 0000000000000..ff73e7c6ba680
--- /dev/null
+++ b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
@@ -0,0 +1,29 @@
+; ModuleID = 'b.obj'
+source_filename = "b.cpp"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.21.27702"
+
+%struct.S = type { i32 (...)** }
+
+; Function Attrs: norecurse nounwind readnone sspstrong uwtable
+define dso_local void @"?foo@S@@UEAAXXZ"(%struct.S* nocapture %this) unnamed_addr #0 align 2 {
+entry:
+  ret void
+}
+
+attributes #0 = { norecurse nounwind readnone sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.linker.options = !{!0, !1}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = !{!"/DEFAULTLIB:libcmt.lib"}
+!1 = !{!"/DEFAULTLIB:oldnames.lib"}
+!2 = !{i32 1, !"wchar_size", i32 2}
+!3 = !{i32 7, !"PIC Level", i32 2}
+!4 = !{i32 1, !"ThinLTO", i32 0}
+!5 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!6 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"}
+
+^0 = module: (path: "b.obj", hash: (0, 0, 0, 0, 0))
+^1 = gv: (name: "?foo@S@@UEAAXXZ", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0)))) ; guid = 6578172636330484861
diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test
new file mode 100644
index 0000000000000..6911b121122a4
--- /dev/null
+++ b/lld/test/COFF/undefined-symbol-lto.test
@@ -0,0 +1,30 @@
+RUN: rm -rf %t && mkdir -p %t && cd %t
+RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj
+RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj
+RUN: llvm-lib b.obj -out:b.lib
+RUN: not lld-link t.obj b.lib -subsystem:console 2>&1 | FileCheck %s
+
+CHECK: undefined symbol: main
+CHECK: referenced by
+CHECK: undefined symbol: void __cdecl undefined_ref(void)
+CHECK: referenced by
+
+Originally reported as PR42536.
+
+a.ll corresponds to this C++:
+
+struct __declspec(dllimport) S {
+  virtual void foo();
+};
+void undefined_ref();
+struct Init {
+  Init() { undefined_ref(); }
+  S c;
+} d;
+
+b.ll is from this C++:
+
+struct S {
+  virtual void foo();
+};
+void S::foo() {}

From 41c22b4390c763f6fc36ec984f3786d465c434b5 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Sat, 13 Jul 2019 00:29:03 +0000
Subject: [PATCH 017/451] Extend function attributes bitset size from 64 to 96.

Summary: We are going to add a function attribute number 64.

Reviewers: pcc, jdoerfert, lebedev.ri

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64663

llvm-svn: 365980
---
 llvm/lib/IR/AttributeImpl.h | 12 ++++++------
 llvm/lib/IR/Attributes.cpp  | 17 ++++++++++++-----
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index f6898476382dd..f989fa3b910e6 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -179,9 +179,9 @@ class AttributeSetNode final
       private TrailingObjects<AttributeSetNode, Attribute> {
   friend TrailingObjects;
 
-  /// Bitset with a bit for each available attribute Attribute::AttrKind.
-  uint64_t AvailableAttrs;
   unsigned NumAttrs; ///< Number of attributes in this node.
+  /// Bitset with a bit for each available attribute Attribute::AttrKind.
+  uint8_t AvailableAttrs[12] = {};
 
   AttributeSetNode(ArrayRef<Attribute> Attrs);
 
@@ -200,7 +200,7 @@ class AttributeSetNode final
   unsigned getNumAttributes() const { return NumAttrs; }
 
   bool hasAttribute(Attribute::AttrKind Kind) const {
-    return AvailableAttrs & ((uint64_t)1) << Kind;
+    return AvailableAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
   }
   bool hasAttribute(StringRef Kind) const;
   bool hasAttributes() const { return NumAttrs != 0; }
@@ -244,10 +244,10 @@ class AttributeListImpl final
   friend TrailingObjects;
 
 private:
-  /// Bitset with a bit for each available attribute Attribute::AttrKind.
-  uint64_t AvailableFunctionAttrs;
   LLVMContext &Context;
   unsigned NumAttrSets; ///< Number of entries in this set.
+  /// Bitset with a bit for each available attribute Attribute::AttrKind.
+  uint8_t AvailableFunctionAttrs[12] = {};
 
   // Helper fn for TrailingObjects class.
   size_t numTrailingObjects(OverloadToken<AttributeSet>) { return NumAttrSets; }
@@ -267,7 +267,7 @@ class AttributeListImpl final
   /// Return true if the AttributeSet or the FunctionIndex has an
   /// enum attribute of the given kind.
   bool hasFnAttribute(Attribute::AttrKind Kind) const {
-    return AvailableFunctionAttrs & ((uint64_t)1) << Kind;
+    return AvailableFunctionAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
   }
 
   using iterator = const AttributeSet *;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 90b3c22e80f01..1ba703bb14c76 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -718,13 +718,18 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const {
 //===----------------------------------------------------------------------===//
 
 AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
-    : AvailableAttrs(0), NumAttrs(Attrs.size()) {
+    : NumAttrs(Attrs.size()) {
   // There's memory after the node where we can store the entries in.
   llvm::copy(Attrs, getTrailingObjects<Attribute>());
 
+  static_assert(Attribute::EndAttrKinds <=
+                    sizeof(AvailableAttrs) * CHAR_BIT,
+                "Too many attributes");
+
   for (const auto I : *this) {
     if (!I.isStringAttribute()) {
-      AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
+      Attribute::AttrKind Kind = I.getKindAsEnum();
+      AvailableAttrs[Kind / 8] |= 1ULL << (Kind % 8);
     }
   }
 }
@@ -896,7 +901,7 @@ static constexpr unsigned attrIdxToArrayIdx(unsigned Index) {
 
 AttributeListImpl::AttributeListImpl(LLVMContext &C,
                                      ArrayRef<AttributeSet> Sets)
-    : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) {
+    : Context(C), NumAttrSets(Sets.size()) {
   assert(!Sets.empty() && "pointless AttributeListImpl");
 
   // There's memory after the node where we can store the entries in.
@@ -909,8 +914,10 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C,
   static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
                 "function should be stored in slot 0");
   for (const auto I : Sets[0]) {
-    if (!I.isStringAttribute())
-      AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum();
+    if (!I.isStringAttribute()) {
+      Attribute::AttrKind Kind = I.getKindAsEnum();
+      AvailableFunctionAttrs[Kind / 8] |= 1ULL << (Kind % 8);
+    }
   }
 }
 

From cafb5d24dfa0faf3fa7dfb7eefd7df08aeb01e55 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sat, 13 Jul 2019 00:47:58 +0000
Subject: [PATCH 018/451] clang/test/Driver/fsanitize.c: Fix -fsanitize=vptr
 using default target

The default implementation of getSupportedSanitizers isn't able to turn
on the vptr sanitizer, and thus, any platform that runs this test will
fail with the error:

    clang: error: unsupported option '-fsanitize=vptr' for target '<target>'

Patch by James Nagurne!

llvm-svn: 365981
---
 clang/test/Driver/fsanitize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index 187d4bfd4301e..a275b576688e6 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -97,7 +97,7 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=vptr -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-TRAP-UNDEF
 // CHECK-VPTR-TRAP-UNDEF: error: invalid argument '-fsanitize=vptr' not allowed with '-fsanitize-trap=undefined'
 
-// RUN: %clang -fsanitize=vptr -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-NO-RTTI
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=vptr -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-NO-RTTI
 // CHECK-VPTR-NO-RTTI: '-fsanitize=vptr' not allowed with '-fno-rtti'
 
 // RUN: %clang -fsanitize=undefined -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-NO-RTTI

From 4f519b6919d2e6e1fb164b87a0d1a5c390581cbe Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Sat, 13 Jul 2019 00:55:06 +0000
Subject: [PATCH 019/451] [TSan] Tiny cleanup of UnmangleLongJmpSp for
 Linux/x86_64

NFC.

llvm-svn: 365982
---
 compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
index 5d9284c525084..0f23da0e877fe 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
@@ -373,9 +373,7 @@ int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) {
 // Reverse operation of libc stack pointer mangling
 static uptr UnmangleLongJmpSp(uptr mangled_sp) {
 #if defined(__x86_64__)
-# if SANITIZER_FREEBSD || SANITIZER_NETBSD
-  return mangled_sp;
-# else  // Linux
+# if SANITIZER_LINUX
   // Reverse of:
   //   xor  %fs:0x30, %rsi
   //   rol  $0x11, %rsi
@@ -385,6 +383,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
       : "=r" (sp)
       : "0" (mangled_sp));
   return sp;
+# else
+  return mangled_sp;
 # endif
 #elif defined(__aarch64__)
 # if SANITIZER_LINUX
@@ -394,11 +394,11 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
 # endif
 #elif defined(__powerpc64__)
   // Reverse of:
-  //  ld   r4, -28696(r13)
-  //  xor  r4, r3, r4
-  uptr xor_guard;
-  asm("ld  %0, -28696(%%r13) \n" : "=r" (xor_guard));
-  return mangled_sp ^ xor_guard;
+  //   ld   r4, -28696(r13)
+  //   xor  r4, r3, r4
+  uptr xor_key;
+  asm("ld  %0, -28696(%%r13)" : "=r" (xor_key));
+  return mangled_sp ^ xor_key;
 #elif defined(__mips__)
   return mangled_sp;
 #else

From 0a7f4cdce9fa07f9027181b19db59b5231932487 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Sat, 13 Jul 2019 01:09:21 +0000
Subject: [PATCH 020/451] [Attributor] Only return attributes with a valid
 state

Attributor::getAAFor will now only return AbstractAttributes with a
valid AbstractState. This simplifies call sites as they only need to
check if the returned pointer is non-null. It also reduces the potential
for accidental misuse.

llvm-svn: 365983
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 8 ++++++--
 llvm/lib/Transforms/IPO/Attributor.cpp        | 5 ++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 0b72394e8e9ec..435aaca75d1c2 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -199,8 +199,12 @@ struct Attributor {
     const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo});
     if (AAType *AA = static_cast<AAType *>(
             KindToAbstractAttributeMap.lookup(AAType::ID))) {
-      QueryMap[AA].insert(&QueryingAA);
-      return AA;
+      // Do not return an attribute with an invalid state. This minimizes checks
+      // at the calls sites and allows the fallback below to kick in.
+      if (AA->getState().isValidState()) {
+        QueryMap[AA].insert(&QueryingAA);
+        return AA;
+      }
     }
 
     // If no abstract attribute was found and we look for a call site argument,
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 85aa7a63ada57..5fea3d0b87a09 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -655,7 +655,7 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
 
     // Try to find a assumed unique return value for the called function.
     auto *RetCSAA = A.getAAFor<AAReturnedValuesImpl>(*this, *RV);
-    if (!RetCSAA || !RetCSAA->isValidState()) {
+    if (!RetCSAA) {
       HasOverdefinedReturnedCalls = true;
       LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV
                         << ") with " << (RetCSAA ? "invalid" : "no")
@@ -965,8 +965,7 @@ ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
       auto ICS = ImmutableCallSite(I);
       auto *NoFreeAA = A.getAAFor<AANoFreeFunction>(*this, *I);
 
-      if ((!NoFreeAA || !NoFreeAA->isValidState() ||
-           !NoFreeAA->isAssumedNoFree()) &&
+      if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) &&
           !ICS.hasFnAttr(Attribute::NoFree)) {
         indicatePessimisticFixpoint();
         return ChangeStatus::CHANGED;

From c7a1db329849b3a5763545a274ed9c91c592553b Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Sat, 13 Jul 2019 01:09:27 +0000
Subject: [PATCH 021/451] [Attributor][NFC] Run clang-format on the attributor
 files (.h/.cpp)

The Attributor files are kept formatted with clang-format, we should try
to keep this state.

llvm-svn: 365984
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  4 +---
 llvm/lib/Transforms/IPO/Attributor.cpp        | 19 +++++++------------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 435aaca75d1c2..88b6af3abbd3d 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -693,9 +693,7 @@ struct AANoSync : public AbstractAttribute {
       : AbstractAttribute(V, InfoCache) {}
 
   /// See AbstractAttribute::getAttrKind().
-  Attribute::AttrKind getAttrKind() const override {
-    return ID;
-  }
+  Attribute::AttrKind getAttrKind() const override { return ID; }
 
   static constexpr Attribute::AttrKind ID =
       Attribute::AttrKind(Attribute::NoSync);
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 5fea3d0b87a09..5a72865db9d0f 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -359,9 +359,7 @@ struct AANoUnwindFunction : AANoUnwind, BooleanState {
   /// }
 
   /// See AbstractAttribute::getManifestPosition().
-  ManifestPosition getManifestPosition() const override {
-    return MP_FUNCTION;
-  }
+  ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
 
   const std::string getAsStr() const override {
     return getAssumed() ? "nounwind" : "may-unwind";
@@ -500,9 +498,7 @@ class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState {
   const AbstractState &getState() const override { return *this; }
 
   /// See AbstractAttribute::getManifestPosition().
-  ManifestPosition getManifestPosition() const override {
-    return MP_ARGUMENT;
-  }
+  ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
 
   /// See AbstractAttribute::updateImpl(Attributor &A).
   ChangeStatus updateImpl(Attributor &A) override;
@@ -742,9 +738,7 @@ struct AANoSyncFunction : AANoSync, BooleanState {
   /// }
 
   /// See AbstractAttribute::getManifestPosition().
-  ManifestPosition getManifestPosition() const override {
-    return MP_FUNCTION;
-  }
+  ManifestPosition getManifestPosition() const override { return MP_FUNCTION; }
 
   const std::string getAsStr() const override {
     return getAssumed() ? "nosync" : "may-sync";
@@ -767,7 +761,8 @@ struct AANoSyncFunction : AANoSync, BooleanState {
   /// Helper function used to determine whether an instruction is volatile.
   static bool isVolatile(Instruction *I);
 
-  /// Helper function uset to check if intrinsic is volatile (memcpy, memmove, memset).
+  /// Helper function uset to check if intrinsic is volatile (memcpy, memmove,
+  /// memset).
   static bool isNoSyncIntrinsic(Instruction *I);
 };
 
@@ -870,7 +865,7 @@ ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
     auto *NoSyncAA = A.getAAFor<AANoSyncFunction>(*this, *I);
 
     if (isa<IntrinsicInst>(I) && isNoSyncIntrinsic(I))
-        continue;
+      continue;
 
     if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) &&
         !ICS.hasFnAttr(Attribute::NoSync)) {
@@ -878,7 +873,7 @@ ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) {
       return ChangeStatus::CHANGED;
     }
 
-    if(ICS)
+    if (ICS)
       continue;
 
     if (!isVolatile(I) && !isNonRelaxedAtomic(I))

From 81b03d4a08b16217669fcccb96e7cc436ab3d74a Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Sat, 13 Jul 2019 01:47:15 +0000
Subject: [PATCH 022/451] [Sema] Diagnose default-initialization, destruction,
 and copying of non-trivial C union types

This patch diagnoses uses of non-trivial C unions and structs/unions
containing non-trivial C unions in the following contexts, which require
default-initialization, destruction, or copying of the union objects,
instead of disallowing fields of non-trivial types in C unions, which is
what we currently do:

- function parameters.
- function returns.
- assignments.
- compound literals.
- block captures except capturing of `__block` variables by non-escaping
  blocks.
- local and global variable definitions.
- lvalue-to-rvalue conversions of volatile types.

See the discussion in https://reviews.llvm.org/D62988 for more background.

rdar://problem/50679094

Differential Revision: https://reviews.llvm.org/D63753

llvm-svn: 365985
---
 clang/include/clang/AST/Decl.h                |  24 ++
 clang/include/clang/AST/DeclBase.h            |   9 +-
 clang/include/clang/AST/Type.h                |  45 ++-
 .../clang/Basic/DiagnosticSemaKinds.td        |  19 +-
 clang/include/clang/Sema/Sema.h               |  42 +++
 clang/lib/AST/Type.cpp                        |  60 +---
 clang/lib/Sema/Sema.cpp                       |  18 +-
 clang/lib/Sema/SemaDecl.cpp                   | 329 ++++++++++++++++--
 clang/lib/Sema/SemaExpr.cpp                   |  33 +-
 clang/lib/Sema/SemaType.cpp                   |   5 +
 clang/lib/Serialization/ASTReaderDecl.cpp     |   3 +
 clang/lib/Serialization/ASTWriterDecl.cpp     |   9 +
 .../test/CodeGenObjC/Inputs/strong_in_union.h |  10 -
 clang/test/CodeGenObjC/strong-in-c-struct.m   |  15 +-
 clang/test/PCH/non-trivial-c-union.m          |  24 ++
 clang/test/SemaObjC/arc-decls.m               |   6 +-
 clang/test/SemaObjC/non-trivial-c-union.m     |  82 +++++
 17 files changed, 611 insertions(+), 122 deletions(-)
 delete mode 100644 clang/test/CodeGenObjC/Inputs/strong_in_union.h
 create mode 100644 clang/test/PCH/non-trivial-c-union.m
 create mode 100644 clang/test/SemaObjC/non-trivial-c-union.m

diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index e593dafb5fc4d..02742801f37c2 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -3746,6 +3746,30 @@ class RecordDecl : public TagDecl {
     RecordDeclBits.NonTrivialToPrimitiveDestroy = V;
   }
 
+  bool hasNonTrivialToPrimitiveDefaultInitializeCUnion() const {
+    return RecordDeclBits.HasNonTrivialToPrimitiveDefaultInitializeCUnion;
+  }
+
+  void setHasNonTrivialToPrimitiveDefaultInitializeCUnion(bool V) {
+    RecordDeclBits.HasNonTrivialToPrimitiveDefaultInitializeCUnion = V;
+  }
+
+  bool hasNonTrivialToPrimitiveDestructCUnion() const {
+    return RecordDeclBits.HasNonTrivialToPrimitiveDestructCUnion;
+  }
+
+  void setHasNonTrivialToPrimitiveDestructCUnion(bool V) {
+    RecordDeclBits.HasNonTrivialToPrimitiveDestructCUnion = V;
+  }
+
+  bool hasNonTrivialToPrimitiveCopyCUnion() const {
+    return RecordDeclBits.HasNonTrivialToPrimitiveCopyCUnion;
+  }
+
+  void setHasNonTrivialToPrimitiveCopyCUnion(bool V) {
+    RecordDeclBits.HasNonTrivialToPrimitiveCopyCUnion = V;
+  }
+
   /// Determine whether this class can be passed in registers. In C++ mode,
   /// it must have at least one trivial, non-deleted copy or move constructor.
   /// FIXME: This should be set as part of completeDefinition.
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 26edb7790c261..d64d0cb425db0 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1440,6 +1440,13 @@ class DeclContext {
     uint64_t NonTrivialToPrimitiveCopy : 1;
     uint64_t NonTrivialToPrimitiveDestroy : 1;
 
+    /// The following bits indicate whether this is or contains a C union that
+    /// is non-trivial to default-initialize, destruct, or copy. These bits
+    /// imply the associated basic non-triviality predicates declared above.
+    uint64_t HasNonTrivialToPrimitiveDefaultInitializeCUnion : 1;
+    uint64_t HasNonTrivialToPrimitiveDestructCUnion : 1;
+    uint64_t HasNonTrivialToPrimitiveCopyCUnion : 1;
+
     /// Indicates whether this struct is destroyed in the callee.
     uint64_t ParamDestroyedInCallee : 1;
 
@@ -1448,7 +1455,7 @@ class DeclContext {
   };
 
   /// Number of non-inherited bits in RecordDeclBitfields.
-  enum { NumRecordDeclBits = 11 };
+  enum { NumRecordDeclBits = 14 };
 
   /// Stores the bits used by OMPDeclareReductionDecl.
   /// If modified NumOMPDeclareReductionDeclBits and the accessor
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 00a2b7643370a..584655fe789e6 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1129,12 +1129,6 @@ class QualType {
     PCK_Struct
   };
 
-  /// Check if this is a non-trivial type that would cause a C struct
-  /// transitively containing this type to be non-trivial. This function can be
-  /// used to determine whether a field of this type can be declared inside a C
-  /// union.
-  bool isNonTrivialPrimitiveCType(const ASTContext &Ctx) const;
-
   /// Check if this is a non-trivial type that would cause a C struct
   /// transitively containing this type to be non-trivial to copy and return the
   /// kind.
@@ -1164,6 +1158,22 @@ class QualType {
     return isDestructedTypeImpl(*this);
   }
 
+  /// Check if this is or contains a C union that is non-trivial to
+  /// default-initialize, which is a union that has a member that is non-trivial
+  /// to default-initialize. If this returns true,
+  /// isNonTrivialToPrimitiveDefaultInitialize returns PDIK_Struct.
+  bool hasNonTrivialToPrimitiveDefaultInitializeCUnion() const;
+
+  /// Check if this is or contains a C union that is non-trivial to destruct,
+  /// which is a union that has a member that is non-trivial to destruct. If
+  /// this returns true, isDestructedType returns DK_nontrivial_c_struct.
+  bool hasNonTrivialToPrimitiveDestructCUnion() const;
+
+  /// Check if this is or contains a C union that is non-trivial to copy, which
+  /// is a union that has a member that is non-trivial to copy. If this returns
+  /// true, isNonTrivialToPrimitiveCopy returns PCK_Struct.
+  bool hasNonTrivialToPrimitiveCopyCUnion() const;
+
   /// Determine whether expressions of the given type are forbidden
   /// from being lvalues in C.
   ///
@@ -1236,6 +1246,11 @@ class QualType {
                                                  const ASTContext &C);
   static QualType IgnoreParens(QualType T);
   static DestructionKind isDestructedTypeImpl(QualType type);
+
+  /// Check if \param RD is or contains a non-trivial C union.
+  static bool hasNonTrivialToPrimitiveDefaultInitializeCUnion(const RecordDecl *RD);
+  static bool hasNonTrivialToPrimitiveDestructCUnion(const RecordDecl *RD);
+  static bool hasNonTrivialToPrimitiveCopyCUnion(const RecordDecl *RD);
 };
 
 } // namespace clang
@@ -6249,6 +6264,24 @@ inline Qualifiers::GC QualType::getObjCGCAttr() const {
   return getQualifiers().getObjCGCAttr();
 }
 
+inline bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion() const {
+  if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl())
+    return hasNonTrivialToPrimitiveDefaultInitializeCUnion(RD);
+  return false;
+}
+
+inline bool QualType::hasNonTrivialToPrimitiveDestructCUnion() const {
+  if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl())
+    return hasNonTrivialToPrimitiveDestructCUnion(RD);
+  return false;
+}
+
+inline bool QualType::hasNonTrivialToPrimitiveCopyCUnion() const {
+  if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl())
+    return hasNonTrivialToPrimitiveCopyCUnion(RD);
+  return false;
+}
+
 inline FunctionType::ExtInfo getFunctionExtInfo(const Type &t) {
   if (const auto *PT = t.getAs<PointerType>()) {
     if (const auto *FT = PT->getPointeeType()->getAs<FunctionType>())
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 380db32ba4bf5..c68271b784da1 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -620,8 +620,23 @@ def warn_cstruct_memaccess : Warning<
   InGroup<NonTrivialMemaccess>;
 def note_nontrivial_field : Note<
   "field is non-trivial to %select{copy|default-initialize}0">;
-def err_nontrivial_primitive_type_in_union : Error<
-  "non-trivial C types are disallowed in union">;
+def err_non_trivial_c_union_in_invalid_context : Error<
+  "cannot %select{"
+  "use type %1 for a function/method parameter|"
+  "use type %1 for function/method return|"
+  "default-initialize an object of type %1|"
+  "declare an automatic variable of type %1|"
+  "copy-initialize an object of type %1|"
+  "assign to a variable of type %1|"
+  "construct an automatic compound literal of type %1|"
+  "capture a variable of type %1|"
+  "cannot use volatile type %1 where it causes an lvalue-to-rvalue conversion"
+  "}3 "
+  "since it %select{contains|is}2 a union that is non-trivial to "
+  "%select{default-initialize|destruct|copy}0">;
+def note_non_trivial_c_union : Note<
+  "%select{%2 has subobjects that are|%3 has type %2 that is}0 "
+  "non-trivial to %select{default-initialize|destruct|copy}1">;
 def warn_dyn_class_memaccess : Warning<
   "%select{destination for|source of|first operand of|second operand of}0 this "
   "%1 call is a pointer to %select{|class containing a }2dynamic class %3; "
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 8f66cda46b65c..af762f74d745c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -2114,6 +2114,48 @@ class Sema {
   bool SetParamDefaultArgument(ParmVarDecl *Param, Expr *DefaultArg,
                                SourceLocation EqualLoc);
 
+  // Contexts where using non-trivial C union types can be disallowed. This is
+  // passed to err_non_trivial_c_union_in_invalid_context.
+  enum NonTrivialCUnionContext {
+    // Function parameter.
+    NTCUC_FunctionParam,
+    // Function return.
+    NTCUC_FunctionReturn,
+    // Default-initialized object.
+    NTCUC_DefaultInitializedObject,
+    // Variable with automatic storage duration.
+    NTCUC_AutoVar,
+    // Initializer expression that might copy from another object.
+    NTCUC_CopyInit,
+    // Assignment.
+    NTCUC_Assignment,
+    // Compound literal.
+    NTCUC_CompoundLiteral,
+    // Block capture.
+    NTCUC_BlockCapture,
+    // lvalue-to-rvalue conversion of volatile type.
+    NTCUC_LValueToRValueVolatile,
+  };
+
+  /// Emit diagnostics if the initializer or any of its explicit or
+  /// implicitly-generated subexpressions require copying or
+  /// default-initializing a type that is or contains a C union type that is
+  /// non-trivial to copy or default-initialize.
+  void checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation Loc);
+
+  // These flags are passed to checkNonTrivialCUnion.
+  enum NonTrivialCUnionKind {
+    NTCUK_Init = 0x1,
+    NTCUK_Destruct = 0x2,
+    NTCUK_Copy = 0x4,
+  };
+
+  /// Emit diagnostics if a non-trivial C union type or a struct that contains
+  /// a non-trivial C union is used in an invalid context.
+  void checkNonTrivialCUnion(QualType QT, SourceLocation Loc,
+                             NonTrivialCUnionContext UseContext,
+                             unsigned NonTrivialKind);
+
   void AddInitializerToDecl(Decl *dcl, Expr *init, bool DirectInit);
   void ActOnUninitializedDecl(Decl *dcl);
   void ActOnInitializerError(Decl *Dcl);
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 733ca232dd037..01e93c11aad87 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2276,60 +2276,16 @@ bool QualType::isNonWeakInMRRWithObjCWeak(const ASTContext &Context) const {
          getObjCLifetime() != Qualifiers::OCL_Weak;
 }
 
-namespace {
-// Helper class that determines whether this is a type that is non-trivial to
-// primitive copy or move, or is a struct type that has a field of such type.
-template <bool IsMove>
-struct IsNonTrivialCopyMoveVisitor
-    : CopiedTypeVisitor<IsNonTrivialCopyMoveVisitor<IsMove>, IsMove, bool> {
-  using Super =
-      CopiedTypeVisitor<IsNonTrivialCopyMoveVisitor<IsMove>, IsMove, bool>;
-  IsNonTrivialCopyMoveVisitor(const ASTContext &C) : Ctx(C) {}
-  void preVisit(QualType::PrimitiveCopyKind PCK, QualType QT) {}
-
-  bool visitWithKind(QualType::PrimitiveCopyKind PCK, QualType QT) {
-    if (const auto *AT = this->Ctx.getAsArrayType(QT))
-      return this->asDerived().visit(Ctx.getBaseElementType(AT));
-    return Super::visitWithKind(PCK, QT);
-  }
-
-  bool visitARCStrong(QualType QT) { return true; }
-  bool visitARCWeak(QualType QT) { return true; }
-  bool visitTrivial(QualType QT) { return false; }
-  // Volatile fields are considered trivial.
-  bool visitVolatileTrivial(QualType QT) { return false; }
-
-  bool visitStruct(QualType QT) {
-    const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
-    // We don't want to apply the C restriction in C++ because C++
-    // (1) can apply the restriction at a finer grain by banning copying or
-    //     destroying the union, and
-    // (2) allows users to override these restrictions by declaring explicit
-    //     constructors/etc, which we're not proposing to add to C.
-    if (isa<CXXRecordDecl>(RD))
-      return false;
-    for (const FieldDecl *FD : RD->fields())
-      if (this->asDerived().visit(FD->getType()))
-        return true;
-    return false;
-  }
-
-  const ASTContext &Ctx;
-};
+bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion(const RecordDecl *RD) {
+  return RD->hasNonTrivialToPrimitiveDefaultInitializeCUnion();
+}
 
-} // namespace
+bool QualType::hasNonTrivialToPrimitiveDestructCUnion(const RecordDecl *RD) {
+  return RD->hasNonTrivialToPrimitiveDestructCUnion();
+}
 
-bool QualType::isNonTrivialPrimitiveCType(const ASTContext &Ctx) const {
-  if (isNonTrivialToPrimitiveDefaultInitialize())
-    return true;
-  DestructionKind DK = isDestructedType();
-  if (DK != DK_none && DK != DK_cxx_destructor)
-    return true;
-  if (IsNonTrivialCopyMoveVisitor<false>(Ctx).visit(*this))
-    return true;
-  if (IsNonTrivialCopyMoveVisitor<true>(Ctx).visit(*this))
-    return true;
-  return false;
+bool QualType::hasNonTrivialToPrimitiveCopyCUnion(const RecordDecl *RD) {
+  return RD->hasNonTrivialToPrimitiveCopyCUnion();
 }
 
 QualType::PrimitiveDefaultInitializeKind
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 3941643893af6..11fed28b52db0 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1658,12 +1658,24 @@ static void markEscapingByrefs(const FunctionScopeInfo &FSI, Sema &S) {
   // Set the EscapingByref flag of __block variables captured by
   // escaping blocks.
   for (const BlockDecl *BD : FSI.Blocks) {
-    if (BD->doesNotEscape())
-      continue;
     for (const BlockDecl::Capture &BC : BD->captures()) {
       VarDecl *VD = BC.getVariable();
-      if (VD->hasAttr<BlocksAttr>())
+      if (VD->hasAttr<BlocksAttr>()) {
+        // Nothing to do if this is a __block variable captured by a
+        // non-escaping block.
+        if (BD->doesNotEscape())
+          continue;
         VD->setEscapingByref();
+      }
+      // Check whether the captured variable is or contains an object of
+      // non-trivial C union type.
+      QualType CapType = BC.getVariable()->getType();
+      if (CapType.hasNonTrivialToPrimitiveDestructCUnion() ||
+          CapType.hasNonTrivialToPrimitiveCopyCUnion())
+        S.checkNonTrivialCUnion(BC.getVariable()->getType(),
+                                BD->getCaretLocation(),
+                                Sema::NTCUC_BlockCapture,
+                                Sema::NTCUK_Destruct|Sema::NTCUK_Copy);
     }
   }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index b2a0632c6e7f1..73407afb49f34 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/NonTrivialTypeVisitor.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/PartialDiagnostic.h"
@@ -6504,6 +6505,11 @@ NamedDecl *Sema::ActOnVariableDeclarator(
 
     if (D.isInvalidType())
       NewVD->setInvalidDecl();
+
+    if (NewVD->getType().hasNonTrivialToPrimitiveDestructCUnion() &&
+        NewVD->hasLocalStorage())
+      checkNonTrivialCUnion(NewVD->getType(), NewVD->getLocation(),
+                            NTCUC_AutoVar, NTCUK_Destruct);
   } else {
     bool Invalid = false;
 
@@ -8924,6 +8930,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
             << FunctionType::getNameForCallConv(CC);
       }
     }
+
+   if (NewFD->getReturnType().hasNonTrivialToPrimitiveDestructCUnion() ||
+       NewFD->getReturnType().hasNonTrivialToPrimitiveCopyCUnion())
+     checkNonTrivialCUnion(NewFD->getReturnType(),
+                           NewFD->getReturnTypeSourceRange().getBegin(),
+                           NTCUC_FunctionReturn, NTCUK_Destruct|NTCUK_Copy);
   } else {
     // C++11 [replacement.functions]p3:
     //  The program's definitions shall not be specified as inline.
@@ -11070,6 +11082,263 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit,
   return VDecl->isInvalidDecl();
 }
 
+void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation Loc) {
+  if (auto *CE = dyn_cast<ConstantExpr>(Init))
+    Init = CE->getSubExpr();
+
+  QualType InitType = Init->getType();
+  assert((InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion() ||
+          InitType.hasNonTrivialToPrimitiveCopyCUnion()) &&
+         "shouldn't be called if type doesn't have a non-trivial C struct");
+  if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
+    for (auto I : ILE->inits()) {
+      if (!I->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() &&
+          !I->getType().hasNonTrivialToPrimitiveCopyCUnion())
+        continue;
+      SourceLocation SL = I->getExprLoc();
+      checkNonTrivialCUnionInInitializer(I, SL.isValid() ? SL : Loc);
+    }
+    return;
+  }
+
+  if (isa<ImplicitValueInitExpr>(Init)) {
+    if (InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion())
+      checkNonTrivialCUnion(InitType, Loc, NTCUC_DefaultInitializedObject,
+                            NTCUK_Init);
+  } else {
+    // Assume all other explicit initializers involving copying some existing
+    // object.
+    // TODO: ignore any explicit initializers where we can guarantee
+    // copy-elision.
+    if (InitType.hasNonTrivialToPrimitiveCopyCUnion())
+      checkNonTrivialCUnion(InitType, Loc, NTCUC_CopyInit, NTCUK_Copy);
+  }
+};
+
+namespace {
+
+struct DiagNonTrivalCUnionDefaultInitializeVisitor
+    : DefaultInitializedTypeVisitor<DiagNonTrivalCUnionDefaultInitializeVisitor,
+                                    void> {
+  using Super =
+      DefaultInitializedTypeVisitor<DiagNonTrivalCUnionDefaultInitializeVisitor,
+                                    void>;
+
+  DiagNonTrivalCUnionDefaultInitializeVisitor(
+      QualType OrigTy, SourceLocation OrigLoc,
+      Sema::NonTrivialCUnionContext UseContext, Sema &S)
+      : OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {}
+
+  void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType QT,
+                     const FieldDecl *FD, bool InNonTrivialUnion) {
+    if (const auto *AT = S.Context.getAsArrayType(QT))
+      return this->asDerived().visit(S.Context.getBaseElementType(AT), FD,
+                                     InNonTrivialUnion);
+    return Super::visitWithKind(PDIK, QT, FD, InNonTrivialUnion);
+  }
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      bool InNonTrivialUnion) {
+    if (InNonTrivialUnion)
+      S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
+          << 1 << 0 << QT << FD->getName();
+  }
+
+  void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
+    if (InNonTrivialUnion)
+      S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
+          << 1 << 0 << QT << FD->getName();
+  }
+
+  void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
+    const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
+    if (RD->isUnion()) {
+      if (OrigLoc.isValid()) {
+        bool IsUnion = false;
+        if (auto *OrigRD = OrigTy->getAsRecordDecl())
+          IsUnion = OrigRD->isUnion();
+        S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context)
+            << 0 << OrigTy << IsUnion << UseContext;
+        // Reset OrigLoc so that this diagnostic is emitted only once.
+        OrigLoc = SourceLocation();
+      }
+      InNonTrivialUnion = true;
+    }
+
+    if (InNonTrivialUnion)
+      S.Diag(RD->getLocation(), diag::note_non_trivial_c_union)
+          << 0 << 0 << QT.getUnqualifiedType() << "";
+
+    for (const FieldDecl *FD : RD->fields())
+      asDerived().visit(FD->getType(), FD, InNonTrivialUnion);
+  }
+
+  void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {}
+
+  // The non-trivial C union type or the struct/union type that contains a
+  // non-trivial C union.
+  QualType OrigTy;
+  SourceLocation OrigLoc;
+  Sema::NonTrivialCUnionContext UseContext;
+  Sema &S;
+};
+
+struct DiagNonTrivalCUnionDestructedTypeVisitor
+    : DestructedTypeVisitor<DiagNonTrivalCUnionDestructedTypeVisitor, void> {
+  using Super =
+      DestructedTypeVisitor<DiagNonTrivalCUnionDestructedTypeVisitor, void>;
+
+  DiagNonTrivalCUnionDestructedTypeVisitor(
+      QualType OrigTy, SourceLocation OrigLoc,
+      Sema::NonTrivialCUnionContext UseContext, Sema &S)
+      : OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {}
+
+  void visitWithKind(QualType::DestructionKind DK, QualType QT,
+                     const FieldDecl *FD, bool InNonTrivialUnion) {
+    if (const auto *AT = S.Context.getAsArrayType(QT))
+      return this->asDerived().visit(S.Context.getBaseElementType(AT), FD,
+                                     InNonTrivialUnion);
+    return Super::visitWithKind(DK, QT, FD, InNonTrivialUnion);
+  }
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      bool InNonTrivialUnion) {
+    if (InNonTrivialUnion)
+      S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
+          << 1 << 1 << QT << FD->getName();
+  }
+
+  void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
+    if (InNonTrivialUnion)
+      S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
+          << 1 << 1 << QT << FD->getName();
+  }
+
+  void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
+    const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
+    if (RD->isUnion()) {
+      if (OrigLoc.isValid()) {
+        bool IsUnion = false;
+        if (auto *OrigRD = OrigTy->getAsRecordDecl())
+          IsUnion = OrigRD->isUnion();
+        S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context)
+            << 1 << OrigTy << IsUnion << UseContext;
+        // Reset OrigLoc so that this diagnostic is emitted only once.
+        OrigLoc = SourceLocation();
+      }
+      InNonTrivialUnion = true;
+    }
+
+    if (InNonTrivialUnion)
+      S.Diag(RD->getLocation(), diag::note_non_trivial_c_union)
+          << 0 << 1 << QT.getUnqualifiedType() << "";
+
+    for (const FieldDecl *FD : RD->fields())
+      asDerived().visit(FD->getType(), FD, InNonTrivialUnion);
+  }
+
+  void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {}
+  void visitCXXDestructor(QualType QT, const FieldDecl *FD,
+                          bool InNonTrivialUnion) {}
+
+  // The non-trivial C union type or the struct/union type that contains a
+  // non-trivial C union.
+  QualType OrigTy;
+  SourceLocation OrigLoc;
+  Sema::NonTrivialCUnionContext UseContext;
+  Sema &S;
+};
+
+struct DiagNonTrivalCUnionCopyVisitor
+    : CopiedTypeVisitor<DiagNonTrivalCUnionCopyVisitor, false, void> {
+  using Super = CopiedTypeVisitor<DiagNonTrivalCUnionCopyVisitor, false, void>;
+
+  DiagNonTrivalCUnionCopyVisitor(QualType OrigTy, SourceLocation OrigLoc,
+                                 Sema::NonTrivialCUnionContext UseContext,
+                                 Sema &S)
+      : OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {}
+
+  void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType QT,
+                     const FieldDecl *FD, bool InNonTrivialUnion) {
+    if (const auto *AT = S.Context.getAsArrayType(QT))
+      return this->asDerived().visit(S.Context.getBaseElementType(AT), FD,
+                                     InNonTrivialUnion);
+    return Super::visitWithKind(PCK, QT, FD, InNonTrivialUnion);
+  }
+
+  void visitARCStrong(QualType QT, const FieldDecl *FD,
+                      bool InNonTrivialUnion) {
+    if (InNonTrivialUnion)
+      S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
+          << 1 << 2 << QT << FD->getName();
+  }
+
+  void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
+    if (InNonTrivialUnion)
+      S.Diag(FD->getLocation(), diag::note_non_trivial_c_union)
+          << 1 << 2 << QT << FD->getName();
+  }
+
+  void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {
+    const RecordDecl *RD = QT->castAs<RecordType>()->getDecl();
+    if (RD->isUnion()) {
+      if (OrigLoc.isValid()) {
+        bool IsUnion = false;
+        if (auto *OrigRD = OrigTy->getAsRecordDecl())
+          IsUnion = OrigRD->isUnion();
+        S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context)
+            << 2 << OrigTy << IsUnion << UseContext;
+        // Reset OrigLoc so that this diagnostic is emitted only once.
+        OrigLoc = SourceLocation();
+      }
+      InNonTrivialUnion = true;
+    }
+
+    if (InNonTrivialUnion)
+      S.Diag(RD->getLocation(), diag::note_non_trivial_c_union)
+          << 0 << 2 << QT.getUnqualifiedType() << "";
+
+    for (const FieldDecl *FD : RD->fields())
+      asDerived().visit(FD->getType(), FD, InNonTrivialUnion);
+  }
+
+  void preVisit(QualType::PrimitiveCopyKind PCK, QualType QT,
+                const FieldDecl *FD, bool InNonTrivialUnion) {}
+  void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {}
+  void visitVolatileTrivial(QualType QT, const FieldDecl *FD,
+                            bool InNonTrivialUnion) {}
+
+  // The non-trivial C union type or the struct/union type that contains a
+  // non-trivial C union.
+  QualType OrigTy;
+  SourceLocation OrigLoc;
+  Sema::NonTrivialCUnionContext UseContext;
+  Sema &S;
+};
+
+} // namespace
+
+void Sema::checkNonTrivialCUnion(QualType QT, SourceLocation Loc,
+                                 NonTrivialCUnionContext UseContext,
+                                 unsigned NonTrivialKind) {
+  assert((QT.hasNonTrivialToPrimitiveDefaultInitializeCUnion() ||
+          QT.hasNonTrivialToPrimitiveDestructCUnion() ||
+          QT.hasNonTrivialToPrimitiveCopyCUnion()) &&
+         "shouldn't be called if type doesn't have a non-trivial C union");
+
+  if ((NonTrivialKind & NTCUK_Init) &&
+      QT.hasNonTrivialToPrimitiveDefaultInitializeCUnion())
+    DiagNonTrivalCUnionDefaultInitializeVisitor(QT, Loc, UseContext, *this)
+        .visit(QT, nullptr, false);
+  if ((NonTrivialKind & NTCUK_Destruct) &&
+      QT.hasNonTrivialToPrimitiveDestructCUnion())
+    DiagNonTrivalCUnionDestructedTypeVisitor(QT, Loc, UseContext, *this)
+        .visit(QT, nullptr, false);
+  if ((NonTrivialKind & NTCUK_Copy) && QT.hasNonTrivialToPrimitiveCopyCUnion())
+    DiagNonTrivalCUnionCopyVisitor(QT, Loc, UseContext, *this)
+        .visit(QT, nullptr, false);
+}
+
 /// AddInitializerToDecl - Adds the initializer Init to the
 /// declaration dcl. If DirectInit is true, this is C++ direct
 /// initialization rather than copy initialization.
@@ -11475,6 +11744,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
       CheckForConstantInitializer(Init, DclT);
   }
 
+  QualType InitType = Init->getType();
+  if (!InitType.isNull() &&
+      (InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion() ||
+       InitType.hasNonTrivialToPrimitiveCopyCUnion()))
+    checkNonTrivialCUnionInInitializer(Init, Init->getExprLoc());
+
   // We will represent direct-initialization similarly to copy-initialization:
   //    int x(1);  -as-> int x = 1;
   //    ClassType x(a,b,c); -as-> ClassType x = ClassType(a,b,c);
@@ -11599,7 +11874,14 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) {
       return;
     }
 
-    switch (Var->isThisDeclarationADefinition()) {
+    VarDecl::DefinitionKind DefKind = Var->isThisDeclarationADefinition();
+    if (!Var->isInvalidDecl() && DefKind != VarDecl::DeclarationOnly &&
+        Var->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion())
+      checkNonTrivialCUnion(Var->getType(), Var->getLocation(),
+                            NTCUC_DefaultInitializedObject, NTCUK_Init);
+
+
+    switch (DefKind) {
     case VarDecl::Definition:
       if (!Var->isStaticDataMember() || !Var->getAnyInitializer())
         break;
@@ -12692,6 +12974,11 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc,
                                          Context.getAdjustedParameterType(T),
                                          TSInfo, SC, nullptr);
 
+  if (New->getType().hasNonTrivialToPrimitiveDestructCUnion() ||
+      New->getType().hasNonTrivialToPrimitiveCopyCUnion())
+    checkNonTrivialCUnion(New->getType(), New->getLocation(),
+                          NTCUC_FunctionParam, NTCUK_Destruct|NTCUK_Copy);
+
   // Parameters can not be abstract class types.
   // For record types, this is done by the AbstractClassUsageDiagnoser once
   // the class has been completely parsed.
@@ -15938,7 +16225,6 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
   // Verify that all the fields are okay.
   SmallVector<FieldDecl*, 32> RecFields;
 
-  bool ObjCFieldLifetimeErrReported = false;
   for (ArrayRef<Decl *>::iterator i = Fields.begin(), end = Fields.end();
        i != end; ++i) {
     FieldDecl *FD = cast<FieldDecl>(*i);
@@ -16077,38 +16363,12 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
         Record->setHasObjectMember(true);
       if (Record && FDTTy->getDecl()->hasVolatileMember())
         Record->setHasVolatileMember(true);
-      if (Record && Record->isUnion() &&
-          FD->getType().isNonTrivialPrimitiveCType(Context))
-        Diag(FD->getLocation(),
-             diag::err_nontrivial_primitive_type_in_union);
     } else if (FDTy->isObjCObjectType()) {
       /// A field cannot be an Objective-c object
       Diag(FD->getLocation(), diag::err_statically_allocated_object)
         << FixItHint::CreateInsertion(FD->getLocation(), "*");
       QualType T = Context.getObjCObjectPointerType(FD->getType());
       FD->setType(T);
-    } else if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() &&
-               Record && !ObjCFieldLifetimeErrReported && Record->isUnion() &&
-               !getLangOpts().CPlusPlus) {
-      // It's an error in ARC or Weak if a field has lifetime.
-      // We don't want to report this in a system header, though,
-      // so we just make the field unavailable.
-      // FIXME: that's really not sufficient; we need to make the type
-      // itself invalid to, say, initialize or copy.
-      QualType T = FD->getType();
-      if (T.hasNonTrivialObjCLifetime()) {
-        SourceLocation loc = FD->getLocation();
-        if (getSourceManager().isInSystemHeader(loc)) {
-          if (!FD->hasAttr<UnavailableAttr>()) {
-            FD->addAttr(UnavailableAttr::CreateImplicit(Context, "",
-                          UnavailableAttr::IR_ARCFieldWithOwnership, loc));
-          }
-        } else {
-          Diag(FD->getLocation(), diag::err_arc_objc_object_in_tag)
-            << T->isBlockPointerType() << Record->getTagKind();
-        }
-        ObjCFieldLifetimeErrReported = true;
-      }
     } else if (getLangOpts().ObjC &&
                getLangOpts().getGC() != LangOptions::NonGC &&
                Record && !Record->hasObjectMember()) {
@@ -16128,14 +16388,23 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
 
     if (Record && !getLangOpts().CPlusPlus && !FD->hasAttr<UnavailableAttr>()) {
       QualType FT = FD->getType();
-      if (FT.isNonTrivialToPrimitiveDefaultInitialize())
+      if (FT.isNonTrivialToPrimitiveDefaultInitialize()) {
         Record->setNonTrivialToPrimitiveDefaultInitialize(true);
+        if (FT.hasNonTrivialToPrimitiveDefaultInitializeCUnion() ||
+            Record->isUnion())
+          Record->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(true);
+      }
       QualType::PrimitiveCopyKind PCK = FT.isNonTrivialToPrimitiveCopy();
-      if (PCK != QualType::PCK_Trivial && PCK != QualType::PCK_VolatileTrivial)
+      if (PCK != QualType::PCK_Trivial && PCK != QualType::PCK_VolatileTrivial) {
         Record->setNonTrivialToPrimitiveCopy(true);
+        if (FT.hasNonTrivialToPrimitiveCopyCUnion() || Record->isUnion())
+          Record->setHasNonTrivialToPrimitiveCopyCUnion(true);
+      }
       if (FT.isDestructedType()) {
         Record->setNonTrivialToPrimitiveDestroy(true);
         Record->setParamDestroyedInCallee(true);
+        if (FT.hasNonTrivialToPrimitiveDestructCUnion() || Record->isUnion())
+          Record->setHasNonTrivialToPrimitiveDestructCUnion(true);
       }
 
       if (const auto *RT = FT->getAs<RecordType>()) {
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 746d3e7e11901..1e49a363ab330 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6066,7 +6066,7 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo,
         ILE->setInit(i, ConstantExpr::Create(Context, Init));
       }
 
-  Expr *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType,
+  auto *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType,
                                               VK, LiteralExpr, isFileScope);
   if (isFileScope) {
     if (!LiteralExpr->isTypeDependent() &&
@@ -6084,6 +6084,19 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo,
     return ExprError();
   }
 
+  // Compound literals that have automatic storage duration are destroyed at
+  // the end of the scope. Emit diagnostics if it is or contains a C union type
+  // that is non-trivial to destruct.
+  if (!isFileScope)
+    if (E->getType().hasNonTrivialToPrimitiveDestructCUnion())
+      checkNonTrivialCUnion(E->getType(), E->getExprLoc(),
+                            NTCUC_CompoundLiteral, NTCUK_Destruct);
+
+  if (E->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() ||
+      E->getType().hasNonTrivialToPrimitiveCopyCUnion())
+    checkNonTrivialCUnionInInitializer(E->getInitializer(),
+                                       E->getInitializer()->getExprLoc());
+
   return MaybeBindToTemporary(E);
 }
 
@@ -12533,6 +12546,10 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
           if (auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
             if (VD->hasLocalStorage() && getCurScope()->isDeclScope(VD))
               BE->getBlockDecl()->setCanAvoidCopyToHeap();
+
+      if (LHS.get()->getType().hasNonTrivialToPrimitiveCopyCUnion())
+        checkNonTrivialCUnion(LHS.get()->getType(), LHS.get()->getExprLoc(),
+                              NTCUC_Assignment, NTCUK_Copy);
     }
     RecordModifiableNonNullParam(*this, LHS.get());
     break;
@@ -13945,6 +13962,11 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
       !BD->isDependentContext())
     computeNRVO(Body, BSI);
 
+  if (RetTy.hasNonTrivialToPrimitiveDestructCUnion() ||
+      RetTy.hasNonTrivialToPrimitiveCopyCUnion())
+    checkNonTrivialCUnion(RetTy, BD->getCaretLocation(), NTCUC_FunctionReturn,
+                          NTCUK_Destruct|NTCUK_Copy);
+
   PopDeclContext();
 
   // Pop the block scope now but keep it alive to the end of this function.
@@ -16196,6 +16218,15 @@ static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E,
 }
 
 ExprResult Sema::CheckLValueToRValueConversionOperand(Expr *E) {
+  // Check whether the operand is or contains an object of non-trivial C union
+  // type.
+  if (E->getType().isVolatileQualified() &&
+      (E->getType().hasNonTrivialToPrimitiveDestructCUnion() ||
+       E->getType().hasNonTrivialToPrimitiveCopyCUnion()))
+    checkNonTrivialCUnion(E->getType(), E->getExprLoc(),
+                          Sema::NTCUC_LValueToRValueVolatile,
+                          NTCUK_Destruct|NTCUK_Copy);
+
   // C++2a [basic.def.odr]p4:
   //   [...] an expression of non-volatile-qualified non-class type to which
   //   the lvalue-to-rvalue conversion is applied [...]
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 88b544068a802..514cbd90d3b85 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2456,6 +2456,11 @@ bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
     return true;
   }
 
+  if (T.hasNonTrivialToPrimitiveDestructCUnion() ||
+      T.hasNonTrivialToPrimitiveCopyCUnion())
+    checkNonTrivialCUnion(T, Loc, NTCUC_FunctionReturn,
+                          NTCUK_Destruct|NTCUK_Copy);
+
   return false;
 }
 
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index b40e3cf892650..3cac82ad421c0 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -794,6 +794,9 @@ ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) {
   RD->setNonTrivialToPrimitiveDefaultInitialize(Record.readInt());
   RD->setNonTrivialToPrimitiveCopy(Record.readInt());
   RD->setNonTrivialToPrimitiveDestroy(Record.readInt());
+  RD->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(Record.readInt());
+  RD->setHasNonTrivialToPrimitiveDestructCUnion(Record.readInt());
+  RD->setHasNonTrivialToPrimitiveCopyCUnion(Record.readInt());
   RD->setParamDestroyedInCallee(Record.readInt());
   RD->setArgPassingRestrictions((RecordDecl::ArgPassingKind)Record.readInt());
   return Redecl;
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 3d9dd7131b1de..b71315505de90 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -476,6 +476,9 @@ void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) {
   Record.push_back(D->isNonTrivialToPrimitiveDefaultInitialize());
   Record.push_back(D->isNonTrivialToPrimitiveCopy());
   Record.push_back(D->isNonTrivialToPrimitiveDestroy());
+  Record.push_back(D->hasNonTrivialToPrimitiveDefaultInitializeCUnion());
+  Record.push_back(D->hasNonTrivialToPrimitiveDestructCUnion());
+  Record.push_back(D->hasNonTrivialToPrimitiveCopyCUnion());
   Record.push_back(D->isParamDestroyedInCallee());
   Record.push_back(D->getArgPassingRestrictions());
 
@@ -1999,6 +2002,12 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   // isNonTrivialToPrimitiveDestroy
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+  // hasNonTrivialToPrimitiveDefaultInitializeCUnion
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+  // hasNonTrivialToPrimitiveDestructCUnion
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
+  // hasNonTrivialToPrimitiveCopyCUnion
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   // isParamDestroyedInCallee
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));
   // getArgPassingRestrictions
diff --git a/clang/test/CodeGenObjC/Inputs/strong_in_union.h b/clang/test/CodeGenObjC/Inputs/strong_in_union.h
deleted file mode 100644
index abe4549055c60..0000000000000
--- a/clang/test/CodeGenObjC/Inputs/strong_in_union.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef STRONG_IN_UNION_H
-#define STRONG_IN_UNION_H
-#pragma clang system_header
-
-typedef union {
-  id f0;
-  int *f1;
-} U;
-
-#endif // STRONG_IN_UNION_H
diff --git a/clang/test/CodeGenObjC/strong-in-c-struct.m b/clang/test/CodeGenObjC/strong-in-c-struct.m
index 19cc1037c4cad..8eeee4af0d30a 100644
--- a/clang/test/CodeGenObjC/strong-in-c-struct.m
+++ b/clang/test/CodeGenObjC/strong-in-c-struct.m
@@ -1,11 +1,10 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -emit-llvm -o - -DUSESTRUCT -I %S/Inputs %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -emit-llvm -o - -DUSESTRUCT %s | FileCheck %s
 
-// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -emit-pch -I %S/Inputs -o %t %s
-// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -include-pch %t -emit-llvm -o - -DUSESTRUCT -I %S/Inputs %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks  -fobjc-runtime=ios-11.0 -include-pch %t -emit-llvm -o - -DUSESTRUCT %s | FileCheck %s
 
 #ifndef HEADER
 #define HEADER
-#include "strong_in_union.h"
 
 typedef void (^BlockTy)(void);
 
@@ -695,14 +694,6 @@ void test_copy_constructor_Bitfield1(Bitfield1 *a) {
   Bitfield1 t = *a;
 }
 
-// CHECK: define void @test_strong_in_union()
-// CHECK: alloca %{{.*}}
-// CHECK-NEXT: ret void
-
-void test_strong_in_union() {
-  U t;
-}
-
 // CHECK: define void @test_copy_constructor_VolatileArray(
 // CHECK: call void @__copy_constructor_8_8_s0_AB8s4n16_tv64w32_AE(
 
diff --git a/clang/test/PCH/non-trivial-c-union.m b/clang/test/PCH/non-trivial-c-union.m
new file mode 100644
index 0000000000000..abd335497db26
--- /dev/null
+++ b/clang/test/PCH/non-trivial-c-union.m
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -emit-pch -o %t.pch %s
+// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -include-pch %t.pch -verify %s
+
+#ifndef HEADER
+#define HEADER
+
+typedef union {
+  id f0;
+} U0;
+
+#else
+
+// expected-note@-6 {{'U0' has subobjects that are non-trivial to destruct}}
+// expected-note@-7 {{'U0' has subobjects that are non-trivial to copy}}
+// expected-note@-8 {{'U0' has subobjects that are non-trivial to default-initialize}}
+// expected-note@-8 {{f0 has type '__strong id' that is non-trivial to destruct}}
+// expected-note@-9 {{f0 has type '__strong id' that is non-trivial to copy}}
+// expected-note@-10 {{f0 has type '__strong id' that is non-trivial to default-initialize}}
+
+U0 foo0(void); // expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}}
+
+U0 g0; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+
+#endif
diff --git a/clang/test/SemaObjC/arc-decls.m b/clang/test/SemaObjC/arc-decls.m
index 0abd45dac33e2..28c3de996996d 100644
--- a/clang/test/SemaObjC/arc-decls.m
+++ b/clang/test/SemaObjC/arc-decls.m
@@ -8,11 +8,7 @@
 };
 
 union u {
-    id u; // expected-error {{ARC forbids Objective-C objects in union}}
-};
-
-union u_nontrivial_c {
-  struct A a; // expected-error {{non-trivial C types are disallowed in union}}
+  id u;
 };
 
 // Volatile fields are fine.
diff --git a/clang/test/SemaObjC/non-trivial-c-union.m b/clang/test/SemaObjC/non-trivial-c-union.m
new file mode 100644
index 0000000000000..7bd82775451c8
--- /dev/null
+++ b/clang/test/SemaObjC/non-trivial-c-union.m
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -fsyntax-only -fblocks -fobjc-arc -fobjc-runtime-has-weak -verify %s
+
+typedef union { // expected-note 12 {{'U0' has subobjects that are non-trivial to default-initialize}} expected-note 36 {{'U0' has subobjects that are non-trivial to destruct}} expected-note 28 {{'U0' has subobjects that are non-trivial to copy}}
+  id f0; // expected-note 12 {{f0 has type '__strong id' that is non-trivial to default-initialize}} expected-note 36 {{f0 has type '__strong id' that is non-trivial to destruct}} expected-note 28 {{f0 has type '__strong id' that is non-trivial to copy}}
+  __weak id f1; // expected-note 12 {{f1 has type '__weak id' that is non-trivial to default-initialize}} expected-note 36 {{f1 has type '__weak id' that is non-trivial to destruct}} expected-note 28 {{f1 has type '__weak id' that is non-trivial to copy}}
+} U0;
+
+typedef struct {
+  U0 f0;
+  id f1;
+} S0;
+
+id g0;
+U0 ug0; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+U0 ug1 = { .f0 = 0 };
+S0 sg0; // expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}}
+S0 sg1 = { .f0 = {0}, .f1 = 0 };
+S0 sg2 = { .f1 = 0 }; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+
+U0 foo0(U0); // expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to copy}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}}
+S0 foo1(S0); // expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to copy}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to copy}}
+
+@interface C
+-(U0)m0:(U0)arg; // expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to copy}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}}
+-(S0)m1:(S0)arg; // expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to copy}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to copy}}
+@end
+
+void testBlockFunction(void) {
+  (void)^(U0 a){ return ug0; }; // expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to copy}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}}
+  (void)^(S0 a){ return sg0; }; // expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to copy}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to copy}}
+}
+void testAutoVar(void) {
+  U0 u0; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+  U0 u1 = ug0; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}}
+  U0 u2 = { g0 }; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}}
+  U0 u3 = { .f1 = g0 }; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}}
+  S0 s0; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}}
+  S0 s1 = sg0; // expected-error {{declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'S0' since it contains a union that is non-trivial to copy}}
+  S0 s2 = { ug0 }; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}}
+  S0 s3 = { .f0 = ug0 }; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}}
+  S0 s4 = { .f1 = g0 }; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+}
+
+void testAssignment(void) {
+  ug0 = ug1; // expected-error {{cannot assign to a variable of type 'U0' since it is a union that is non-trivial to copy}}
+  sg0 = sg1; // expected-error {{cannot assign to a variable of type 'S0' since it contains a union that is non-trivial to copy}}
+}
+
+U0 ug2 = (U0){ .f1 = 0 }; // expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}}
+S0 sg3 = (S0){ .f0 = {0}, .f1 = 0 }; // expected-error {{cannot copy-initialize an object of type 'S0' since it contains a union that is non-trivial to copy}}
+S0 *sg4 = &(S0){ .f1 = 0 }; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+
+void testCompoundLiteral(void) {
+  const U0 *t0 = &(U0){ .f0 = g0 }; // expected-error {{cannot construct an automatic compound literal of type 'U0' since it is a union that is non-trivial to destruct}}
+  const U0 *t1 = &(U0){ .f1 = g0 }; // expected-error {{cannot construct an automatic compound literal of type 'U0' since it is a union that is non-trivial to destruct}}
+  const S0 *t2 = &(S0){ .f0 = ug0 }; // expected-error {{cannot construct an automatic compound literal of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}}
+  const S0 *t3 = &(S0){ .f1 = g0 }; // expected-error {{cannot construct an automatic compound literal of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+}
+
+typedef void (^BlockTy)(void);
+void escapingFunc(BlockTy);
+void noescapingFunc(__attribute__((noescape)) BlockTy);
+
+void testBlockCapture(void) {
+  U0 t0; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+  S0 t1; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}}
+  __block U0 t2; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}}
+  __block S0 t3; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}}
+
+  escapingFunc(^{ g0 = t0.f0; }); // expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to copy}}
+  escapingFunc(^{ g0 = t1.f0.f0; }); // expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to copy}}
+  escapingFunc(^{ g0 = t2.f0; }); // expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to copy}}
+  escapingFunc(^{ g0 = t3.f0.f0; }); // expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to copy}}
+  noescapingFunc(^{ g0 = t0.f0; }); // expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to copy}}
+  noescapingFunc(^{ g0 = t1.f0.f0; }); // expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to copy}}
+  noescapingFunc(^{ g0 = t2.f0; });
+  noescapingFunc(^{ g0 = t3.f0.f0; });
+}
+
+void testVolatileLValueToRValue(volatile U0 *a) {
+  (void)*a; // expected-error {{cannot use volatile type 'volatile U0' where it causes an lvalue-to-rvalue conversion since it is a union that is non-trivial to destruct}} // expected-error {{cannot use volatile type 'volatile U0' where it causes an lvalue-to-rvalue conversion since it is a union that is non-trivial to copy}}
+}

From 087b044c4915717a51f8c0adb18eca5ae7a4f994 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Sat, 13 Jul 2019 03:24:48 +0000
Subject: [PATCH 023/451] [clangd] Implement typeHierarchy/resolve for subtypes

Summary:
This allows the client to resolve subtypes one level at a time.

For supertypes, this is not necessary, because we eagerly compute
supertypes and return all levels.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64308

llvm-svn: 365986
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  |  8 +++
 clang-tools-extra/clangd/ClangdLSPServer.h    |  2 +
 clang-tools-extra/clangd/ClangdServer.cpp     |  7 +++
 clang-tools-extra/clangd/ClangdServer.h       |  5 ++
 clang-tools-extra/clangd/Protocol.cpp         | 17 +++--
 clang-tools-extra/clangd/Protocol.h           | 23 +++++--
 clang-tools-extra/clangd/XRefs.cpp            | 25 +++++++-
 clang-tools-extra/clangd/XRefs.h              |  4 ++
 .../clangd/test/type-hierarchy.test           | 63 ++++++++++++++++++-
 .../clangd/unittests/TypeHierarchyTests.cpp   | 46 +++++++++++++-
 10 files changed, 187 insertions(+), 13 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index 2b25058c9672c..5f8b307f721a6 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -926,6 +926,13 @@ void ClangdLSPServer::onTypeHierarchy(
                         Params.resolve, Params.direction, std::move(Reply));
 }
 
+void ClangdLSPServer::onResolveTypeHierarchy(
+    const ResolveTypeHierarchyItemParams &Params,
+    Callback<Optional<TypeHierarchyItem>> Reply) {
+  Server->resolveTypeHierarchy(Params.item, Params.resolve, Params.direction,
+                               std::move(Reply));
+}
+
 void ClangdLSPServer::applyConfiguration(
     const ConfigurationSettings &Settings) {
   // Per-file update to the compilation database.
@@ -1021,6 +1028,7 @@ ClangdLSPServer::ClangdLSPServer(
   MsgHandler->bind("workspace/didChangeConfiguration", &ClangdLSPServer::onChangeConfiguration);
   MsgHandler->bind("textDocument/symbolInfo", &ClangdLSPServer::onSymbolInfo);
   MsgHandler->bind("textDocument/typeHierarchy", &ClangdLSPServer::onTypeHierarchy);
+  MsgHandler->bind("typeHierarchy/resolve", &ClangdLSPServer::onResolveTypeHierarchy);
   // clang-format on
 }
 
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h
index 43e05b3dc8e55..1c37750a18526 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.h
+++ b/clang-tools-extra/clangd/ClangdLSPServer.h
@@ -100,6 +100,8 @@ class ClangdLSPServer : private DiagnosticsConsumer {
                Callback<llvm::Optional<Hover>>);
   void onTypeHierarchy(const TypeHierarchyParams &,
                        Callback<llvm::Optional<TypeHierarchyItem>>);
+  void onResolveTypeHierarchy(const ResolveTypeHierarchyItemParams &,
+                              Callback<llvm::Optional<TypeHierarchyItem>>);
   void onChangeConfiguration(const DidChangeConfigurationParams &);
   void onSymbolInfo(const TextDocumentPositionParams &,
                     Callback<std::vector<SymbolDetails>>);
diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 451dac6f0df4b..10949ef001c02 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -528,6 +528,13 @@ void ClangdServer::typeHierarchy(PathRef File, Position Pos, int Resolve,
   WorkScheduler.runWithAST("Type Hierarchy", File, Bind(Action, std::move(CB)));
 }
 
+void ClangdServer::resolveTypeHierarchy(
+    TypeHierarchyItem Item, int Resolve, TypeHierarchyDirection Direction,
+    Callback<llvm::Optional<TypeHierarchyItem>> CB) {
+  clangd::resolveTypeHierarchy(Item, Resolve, Direction, Index);
+  CB(Item);
+}
+
 void ClangdServer::onFileEvent(const DidChangeWatchedFilesParams &Params) {
   // FIXME: Do nothing for now. This will be used for indexing and potentially
   // invalidating other caches.
diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
index ba39806eb72c4..fa6783b1f1c13 100644
--- a/clang-tools-extra/clangd/ClangdServer.h
+++ b/clang-tools-extra/clangd/ClangdServer.h
@@ -210,6 +210,11 @@ class ClangdServer {
                      TypeHierarchyDirection Direction,
                      Callback<llvm::Optional<TypeHierarchyItem>> CB);
 
+  /// Resolve type hierarchy item in the given direction.
+  void resolveTypeHierarchy(TypeHierarchyItem Item, int Resolve,
+                            TypeHierarchyDirection Direction,
+                            Callback<llvm::Optional<TypeHierarchyItem>> CB);
+
   /// Retrieve the top symbols from the workspace matching a query.
   void workspaceSymbols(StringRef Query, int Limit,
                         Callback<std::vector<SymbolInformation>> CB);
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 7c70afb567df9..600896b1eeee6 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -422,8 +422,7 @@ bool fromJSON(const llvm::json::Value &Params,
 bool fromJSON(const llvm::json::Value &Params,
               DocumentRangeFormattingParams &R) {
   llvm::json::ObjectMapper O(Params);
-  return O && O.map("textDocument", R.textDocument) &&
-         O.map("range", R.range);
+  return O && O.map("textDocument", R.textDocument) && O.map("range", R.range);
 }
 
 bool fromJSON(const llvm::json::Value &Params,
@@ -445,8 +444,8 @@ bool fromJSON(const llvm::json::Value &Params, DocumentSymbolParams &R) {
 
 llvm::json::Value toJSON(const DiagnosticRelatedInformation &DRI) {
   return llvm::json::Object{
-    {"location", DRI.location},
-    {"message", DRI.message},
+      {"location", DRI.location},
+      {"message", DRI.message},
   };
 }
 
@@ -978,6 +977,8 @@ llvm::json::Value toJSON(const TypeHierarchyItem &I) {
     Result["parents"] = I.parents;
   if (I.children)
     Result["children"] = I.children;
+  if (I.data)
+    Result["data"] = I.data;
   return std::move(Result);
 }
 
@@ -996,10 +997,18 @@ bool fromJSON(const llvm::json::Value &Params, TypeHierarchyItem &I) {
   O.map("deprecated", I.deprecated);
   O.map("parents", I.parents);
   O.map("children", I.children);
+  O.map("data", I.data);
 
   return true;
 }
 
+bool fromJSON(const llvm::json::Value &Params,
+              ResolveTypeHierarchyItemParams &P) {
+  llvm::json::ObjectMapper O(Params);
+  return O && O.map("item", P.item) && O.map("resolve", P.resolve) &&
+         O.map("direction", P.direction);
+}
+
 bool fromJSON(const llvm::json::Value &Params, ReferenceParams &R) {
   TextDocumentPositionParams &Base = R;
   return fromJSON(Params, Base);
diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h
index 7a1a8c77d2591..a2c9438ea9d5d 100644
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@@ -1127,7 +1127,7 @@ struct TypeHierarchyItem {
   SymbolKind kind;
 
   /// `true` if the hierarchy item is deprecated. Otherwise, `false`.
-  bool deprecated;
+  bool deprecated = false;
 
   /// The URI of the text document where this type hierarchy item belongs to.
   URIForFile uri;
@@ -1153,13 +1153,26 @@ struct TypeHierarchyItem {
   /// descendants. If not defined, the children have not been resolved.
   llvm::Optional<std::vector<TypeHierarchyItem>> children;
 
-  /// The protocol has a slot here for an optional 'data' filed, which can
-  /// be used to identify a type hierarchy item in a resolve request. We don't
-  /// need this (the item itself is sufficient to identify what to resolve)
-  /// so don't declare it.
+  /// An optional 'data' filed, which can be used to identify a type hierarchy
+  /// item in a resolve request.
+  llvm::Optional<std::string> data;
 };
 llvm::json::Value toJSON(const TypeHierarchyItem &);
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const TypeHierarchyItem &);
+bool fromJSON(const llvm::json::Value &, TypeHierarchyItem &);
+
+/// Parameters for the `typeHierarchy/resolve` request.
+struct ResolveTypeHierarchyItemParams {
+  /// The item to resolve.
+  TypeHierarchyItem item;
+
+  /// The hierarchy levels to resolve. `0` indicates no level.
+  int resolve;
+
+  /// The direction of the hierarchy levels to resolve.
+  TypeHierarchyDirection direction;
+};
+bool fromJSON(const llvm::json::Value &, ResolveTypeHierarchyItemParams &);
 
 struct ReferenceParams : public TextDocumentPositionParams {
   // For now, no options like context.includeDeclaration are supported.
diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 1d34499b36edf..59f07ee405eed 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -893,7 +893,7 @@ llvm::Optional<QualType> getDeducedType(ParsedAST &AST,
 
 /// Retrieves the deduced type at a given location (auto, decltype).
 bool hasDeducedType(ParsedAST &AST, SourceLocation SourceLocationBeg) {
-  return (bool) getDeducedType(AST, SourceLocationBeg);
+  return (bool)getDeducedType(AST, SourceLocationBeg);
 }
 
 llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos,
@@ -1104,6 +1104,10 @@ symbolToTypeHierarchyItem(const Symbol &S, const SymbolIndex *Index,
   // (https://github.com/clangd/clangd/issues/59).
   THI.range = THI.selectionRange;
   THI.uri = Loc->uri;
+  // Store the SymbolID in the 'data' field. The client will
+  // send this back in typeHierarchy/resolve, allowing us to
+  // continue resolving additional levels of the type hierarchy.
+  THI.data = S.ID.str();
 
   return std::move(THI);
 }
@@ -1247,6 +1251,25 @@ getTypeHierarchy(ParsedAST &AST, Position Pos, int ResolveLevels,
   return Result;
 }
 
+void resolveTypeHierarchy(TypeHierarchyItem &Item, int ResolveLevels,
+                          TypeHierarchyDirection Direction,
+                          const SymbolIndex *Index) {
+  // We only support typeHierarchy/resolve for children, because for parents
+  // we ignore ResolveLevels and return all levels of parents eagerly.
+  if (Direction == TypeHierarchyDirection::Parents || ResolveLevels == 0)
+    return;
+
+  Item.children.emplace();
+
+  if (Index && Item.data) {
+    // We store the item's SymbolID in the 'data' field, and the client
+    // passes it back to us in typeHierarchy/resolve.
+    if (Expected<SymbolID> ID = SymbolID::fromStr(*Item.data)) {
+      fillSubTypes(*ID, *Item.children, Index, ResolveLevels, Item.uri.file());
+    }
+  }
+}
+
 FormattedString HoverInfo::present() const {
   FormattedString Output;
   if (NamespaceScope) {
diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h
index 318133a572a28..3044036c17f15 100644
--- a/clang-tools-extra/clangd/XRefs.h
+++ b/clang-tools-extra/clangd/XRefs.h
@@ -141,6 +141,10 @@ llvm::Optional<TypeHierarchyItem> getTypeHierarchy(
     ParsedAST &AST, Position Pos, int Resolve, TypeHierarchyDirection Direction,
     const SymbolIndex *Index = nullptr, PathRef TUPath = PathRef{});
 
+void resolveTypeHierarchy(TypeHierarchyItem &Item, int ResolveLevels,
+                          TypeHierarchyDirection Direction,
+                          const SymbolIndex *Index);
+
 /// Retrieves the deduced type at a given location (auto, decltype).
 /// Retuns None unless SourceLocationBeg starts an auto/decltype token.
 /// It will return the underlying type.
diff --git a/clang-tools-extra/clangd/test/type-hierarchy.test b/clang-tools-extra/clangd/test/type-hierarchy.test
index 7161bd143bff3..b2e78ae249dcc 100644
--- a/clang-tools-extra/clangd/test/type-hierarchy.test
+++ b/clang-tools-extra/clangd/test/type-hierarchy.test
@@ -1,7 +1,7 @@
 # RUN: clangd -lit-test < %s | FileCheck -strict-whitespace %s
 {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}}
 ---
-{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"struct Parent {};\nstruct Child1 : Parent {};\nstruct Child2 : Child1 {};\nstruct Child3 : Child2 {};"}}}
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"struct Parent {};\nstruct Child1 : Parent {};\nstruct Child2 : Child1 {};\nstruct Child3 : Child2 {};\nstruct Child4 : Child3 {};"}}}
 ---
 {"jsonrpc":"2.0","id":1,"method":"textDocument/typeHierarchy","params":{"textDocument":{"uri":"test:///main.cpp"},"position":{"line":2,"character":11},"direction":2,"resolve":1}}
 #      CHECK:  "id": 1
@@ -9,6 +9,7 @@
 # CHECK-NEXT:  "result": {
 # CHECK-NEXT:    "children": [
 # CHECK-NEXT:      {
+# CHECK-NEXT:        "data": "A6576FE083F2949A",
 # CHECK-NEXT:        "kind": 23,
 # CHECK-NEXT:        "name": "Child3",
 # CHECK-NEXT:        "range": {
@@ -114,6 +115,64 @@
 # CHECK-NEXT:    "uri": "file:///clangd-test/main.cpp"
 # CHECK-NEXT:  }
 ---
-{"jsonrpc":"2.0","id":2,"method":"shutdown"}
+{"jsonrpc":"2.0","id":2,"method":"typeHierarchy/resolve","params":{"item":{"uri":"test:///main.cpp","data":"A6576FE083F2949A","name":"Child3","kind":23,"range":{"end":{"character":13,"line":3},"start":{"character":7,"line":3}},"selectionRange":{"end":{"character":13,"line":3},"start":{"character":7,"line":3}}},"direction":0,"resolve":1}}
+#      CHECK:  "id": 2
+# CHECK-NEXT:  "jsonrpc": "2.0",
+# CHECK-NEXT:  "result": {
+# CHECK-NEXT:    "children": [
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "data": "5705B382DFC77CBC",
+# CHECK-NEXT:        "kind": 23,
+# CHECK-NEXT:        "name": "Child4",
+# CHECK-NEXT:        "range": {
+# CHECK-NEXT:          "end": {
+# CHECK-NEXT:            "character": 13,
+# CHECK-NEXT:            "line": 4
+# CHECK-NEXT:          },
+# CHECK-NEXT:          "start": {
+# CHECK-NEXT:            "character": 7,
+# CHECK-NEXT:            "line": 4
+# CHECK-NEXT:          }
+# CHECK-NEXT:        },
+# CHECK-NEXT:        "selectionRange": {
+# CHECK-NEXT:          "end": {
+# CHECK-NEXT:            "character": 13,
+# CHECK-NEXT:            "line": 4
+# CHECK-NEXT:          },
+# CHECK-NEXT:          "start": {
+# CHECK-NEXT:            "character": 7,
+# CHECK-NEXT:            "line": 4
+# CHECK-NEXT:          }
+# CHECK-NEXT:        },
+# CHECK-NEXT:        "uri": "file:///clangd-test/main.cpp"
+# CHECK-NEXT:      }
+# CHECK-NEXT:    ],
+# CHECK-NEXT:    "data": "A6576FE083F2949A",
+# CHECK-NEXT:    "kind": 23,
+# CHECK-NEXT:    "name": "Child3",
+# CHECK-NEXT:    "range": {
+# CHECK-NEXT:      "end": {
+# CHECK-NEXT:        "character": 13,
+# CHECK-NEXT:        "line": 3
+# CHECK-NEXT:      },
+# CHECK-NEXT:      "start": {
+# CHECK-NEXT:        "character": 7,
+# CHECK-NEXT:        "line": 3
+# CHECK-NEXT:      }
+# CHECK-NEXT:    },
+# CHECK-NEXT:    "selectionRange": {
+# CHECK-NEXT:      "end": {
+# CHECK-NEXT:        "character": 13,
+# CHECK-NEXT:        "line": 3
+# CHECK-NEXT:      },
+# CHECK-NEXT:      "start": {
+# CHECK-NEXT:        "character": 7,
+# CHECK-NEXT:        "line": 3
+# CHECK-NEXT:      }
+# CHECK-NEXT:    },
+# CHECK-NEXT:    "uri": "file:///clangd-test/main.cpp"
+# CHECK-NEXT:  }
+---
+{"jsonrpc":"2.0","id":3,"method":"shutdown"}
 ---
 {"jsonrpc":"2.0","method":"exit"}
diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp
index be16646a72036..633a25fe3b442 100644
--- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp
@@ -42,8 +42,17 @@ MATCHER_P(WithKind, Kind, "") { return arg.kind == Kind; }
 MATCHER_P(SelectionRangeIs, R, "") { return arg.selectionRange == R; }
 template <class... ParentMatchers>
 ::testing::Matcher<TypeHierarchyItem> Parents(ParentMatchers... ParentsM) {
-  return Field(&TypeHierarchyItem::parents, HasValue(ElementsAre(ParentsM...)));
+  return Field(&TypeHierarchyItem::parents,
+               HasValue(UnorderedElementsAre(ParentsM...)));
 }
+template <class... ChildMatchers>
+::testing::Matcher<TypeHierarchyItem> Children(ChildMatchers... ChildrenM) {
+  return Field(&TypeHierarchyItem::children,
+               HasValue(UnorderedElementsAre(ChildrenM...)));
+}
+// Note: "not resolved" is differnt from "resolved but empty"!
+MATCHER(ParentsNotResolved, "") { return !arg.parents; }
+MATCHER(ChildrenNotResolved, "") { return !arg.children; }
 
 TEST(FindRecordTypeAt, TypeOrVariable) {
   Annotations Source(R"cpp(
@@ -603,6 +612,41 @@ struct Child : Parent<T> {};
   EXPECT_THAT(collectSubtypes(Parent, Index.get()), ElementsAre(Child));
 }
 
+TEST(Subtypes, LazyResolution) {
+  Annotations Source(R"cpp(
+struct P^arent {};
+struct Child1 : Parent {};
+struct Child2a : Child1 {};
+struct Child2b : Child1 {};
+)cpp");
+
+  TestTU TU = TestTU::withCode(Source.code());
+  auto AST = TU.build();
+  auto Index = TU.index();
+
+  llvm::Optional<TypeHierarchyItem> Result = getTypeHierarchy(
+      AST, Source.point(), /*ResolveLevels=*/1,
+      TypeHierarchyDirection::Children, Index.get(), testPath(TU.Filename));
+  ASSERT_TRUE(bool(Result));
+  EXPECT_THAT(
+      *Result,
+      AllOf(WithName("Parent"), WithKind(SymbolKind::Struct), Parents(),
+            Children(AllOf(WithName("Child1"), WithKind(SymbolKind::Struct),
+                           ParentsNotResolved(), ChildrenNotResolved()))));
+
+  resolveTypeHierarchy((*Result->children)[0], /*ResolveLevels=*/1,
+                       TypeHierarchyDirection::Children, Index.get());
+
+  EXPECT_THAT(
+      (*Result->children)[0],
+      AllOf(WithName("Child1"), WithKind(SymbolKind::Struct),
+            ParentsNotResolved(),
+            Children(AllOf(WithName("Child2a"), WithKind(SymbolKind::Struct),
+                           ParentsNotResolved(), ChildrenNotResolved()),
+                     AllOf(WithName("Child2b"), WithKind(SymbolKind::Struct),
+                           ParentsNotResolved(), ChildrenNotResolved()))));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

From d1fdadb22685b88d885833f4a21cca5df45fa303 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Sat, 13 Jul 2019 03:24:54 +0000
Subject: [PATCH 024/451] [clangd] Mark type hierarchy as a supported feature
 in the docs

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64614

llvm-svn: 365987
---
 clang-tools-extra/docs/clangd/Features.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/clangd/Features.rst b/clang-tools-extra/docs/clangd/Features.rst
index 3e6e745a691c8..87b6713c44ace 100644
--- a/clang-tools-extra/docs/clangd/Features.rst
+++ b/clang-tools-extra/docs/clangd/Features.rst
@@ -261,7 +261,7 @@ developed outside clangd or become clangd extensions to LSP.
 +-------------------------------------+------------+----------+
 | Call hierarchy                      | No         |   No     |
 +-------------------------------------+------------+----------+
-| Type hierarchy                      | No         |   No     |
+| Type hierarchy                      | No         |   Yes    |
 +-------------------------------------+------------+----------+
 | Organize Includes                   | No         |   No     |
 +-------------------------------------+------------+----------+

From 497bb44fc41f2e9c4d3c5ed8f525da01fa476979 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Sat, 13 Jul 2019 03:30:55 +0000
Subject: [PATCH 025/451] Make Python version setting actually effective

This needs to be outside the if to actually work. Also, this adjusts the
list of versions to match LLVM.

Patch by: Christian Biesinger

Differential revision: https://reviews.llvm.org/D64578

llvm-svn: 365988
---
 lldb/cmake/modules/LLDBStandalone.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 80075b91b6e40..803f6bda968b2 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -87,8 +87,8 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   include(CheckAtomic)
   include(LLVMDistributionSupport)
 
+  set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 2.7)
   if (PYTHON_EXECUTABLE STREQUAL "")
-    set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 3.4 3.3 3.2 3.1 3.0 2.7 2.6 2.5)
     include(FindPythonInterp)
     if( NOT PYTHONINTERP_FOUND )
       message(FATAL_ERROR

From 118ee5f2e06a9972bd9fd171444cc080c03b0b99 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Sat, 13 Jul 2019 03:59:55 +0000
Subject: [PATCH 026/451] Initialize the non-trivial C union bits I added to
 RecordDeclBitfields in r365985

These bits weren't being initialized in the RecordDecl's constructor,
which probably caused test/Modules/stress1.cpp to fail on a couple of
bots.

llvm-svn: 365989
---
 clang/lib/AST/Decl.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 21dd5425834a9..21cf9da18a8b2 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4252,6 +4252,9 @@ RecordDecl::RecordDecl(Kind DK, TagKind TK, const ASTContext &C,
   setNonTrivialToPrimitiveDefaultInitialize(false);
   setNonTrivialToPrimitiveCopy(false);
   setNonTrivialToPrimitiveDestroy(false);
+  setHasNonTrivialToPrimitiveDefaultInitializeCUnion(false);
+  setHasNonTrivialToPrimitiveDestructCUnion(false);
+  setHasNonTrivialToPrimitiveCopyCUnion(false);
   setParamDestroyedInCallee(false);
   setArgPassingRestrictions(APK_CanPassInRegs);
 }

From 1a6053ebc61cb0b8146f5ca27b74859a9a91e0a3 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Sat, 13 Jul 2019 05:31:48 +0000
Subject: [PATCH 027/451] Revert "[COFF] Add null check in case of symbols
 defined in LTO blobs"

This reverts commit r365979: COFF/undefined-symbol-lto.test is failing.

llvm-svn: 365990
---
 lld/COFF/SymbolTable.cpp                      |  2 +-
 .../COFF/Inputs/undefined-symbol-lto-a.ll     | 82 -------------------
 .../COFF/Inputs/undefined-symbol-lto-b.ll     | 29 -------
 lld/test/COFF/undefined-symbol-lto.test       | 30 -------
 4 files changed, 1 insertion(+), 142 deletions(-)
 delete mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
 delete mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
 delete mode 100644 lld/test/COFF/undefined-symbol-lto.test

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 2173c10c1ca56..280a9c28892c8 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -69,7 +69,7 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
 
   for (Symbol *s : sc->file->getSymbols()) {
     auto *d = dyn_cast_or_null<DefinedRegular>(s);
-    if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr ||
+    if (!d || d->getChunk() != sc || d->getValue() > addr ||
         (candidate && d->getValue() < candidate->getValue()))
       continue;
 
diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
deleted file mode 100644
index 6793ec718e806..0000000000000
--- a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; ModuleID = 't.obj'
-source_filename = "t.cpp"
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc19.21.27702"
-
-%struct.Init = type { %struct.S }
-%struct.S = type { i32 (...)** }
-%rtti.CompleteObjectLocator = type { i32, i32, i32, i32, i32, i32 }
-%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] }
-%rtti.ClassHierarchyDescriptor = type { i32, i32, i32, i32 }
-%rtti.BaseClassDescriptor = type { i32, i32, i32, i32, i32, i32, i32 }
-
-$"??_SS@@6B@" = comdat largest
-
-$"??_R4S@@6B@" = comdat any
-
-$"??_R0?AUS@@@8" = comdat any
-
-$"??_R3S@@8" = comdat any
-
-$"??_R2S@@8" = comdat any
-
-$"??_R1A@?0A@EA@S@@8" = comdat any
-
-@"?d@@3UInit@@A" = dso_local local_unnamed_addr global %struct.Init zeroinitializer, align 8
-@anon.bcb2691509de99310dddb690fcdb4cdc.0 = private unnamed_addr constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*)* @"?foo@S@@UEAAXXZ" to i8*)] }, comdat($"??_SS@@6B@"), !type !0
-@"??_R4S@@6B@" = linkonce_odr constant %rtti.CompleteObjectLocator { i32 1, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
-@"??_7type_info@@6B@" = external constant i8*
-@"??_R0?AUS@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"??_7type_info@@6B@", i8* null, [8 x i8] c".?AUS@@\00" }, comdat
-@__ImageBase = external dso_local constant i8
-@"??_R3S@@8" = linkonce_odr constant %rtti.ClassHierarchyDescriptor { i32 0, i32 0, i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint ([2 x i32]* @"??_R2S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
-@"??_R2S@@8" = linkonce_odr constant [2 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.BaseClassDescriptor* @"??_R1A@?0A@EA@S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0], comdat
-@"??_R1A@?0A@EA@S@@8" = linkonce_odr constant %rtti.BaseClassDescriptor { i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 0, i32 -1, i32 0, i32 64, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
-@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_t.cpp, i8* null }]
-
-@"??_SS@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @anon.bcb2691509de99310dddb690fcdb4cdc.0, i32 0, i32 0, i32 1)
-
-declare dso_local void @"?undefined_ref@@YAXXZ"() local_unnamed_addr #0
-
-declare dllimport void @"?foo@S@@UEAAXXZ"(%struct.S*) unnamed_addr #0
-
-; Function Attrs: nounwind sspstrong uwtable
-define internal void @_GLOBAL__sub_I_t.cpp() #1 {
-entry:
-  store i32 (...)** bitcast (i8** @"??_SS@@6B@" to i32 (...)**), i32 (...)*** getelementptr inbounds (%struct.Init, %struct.Init* @"?d@@3UInit@@A", i64 0, i32 0, i32 0), align 8
-  tail call void @"?undefined_ref@@YAXXZ"() #2
-  ret void
-}
-
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
-
-!llvm.linker.options = !{!1, !2}
-!llvm.module.flags = !{!3, !4, !5, !6}
-!llvm.ident = !{!7}
-
-!0 = !{i64 8, !"?AUS@@"}
-!1 = !{!"/DEFAULTLIB:libcmt.lib"}
-!2 = !{!"/DEFAULTLIB:oldnames.lib"}
-!3 = !{i32 1, !"wchar_size", i32 2}
-!4 = !{i32 7, !"PIC Level", i32 2}
-!5 = !{i32 1, !"ThinLTO", i32 0}
-!6 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
-!7 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"}
-
-^0 = module: (path: "t.obj", hash: (0, 0, 0, 0, 0))
-^1 = gv: (name: "__ImageBase") ; guid = 434928772013489304
-^2 = gv: (name: "??_R2S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^6)))) ; guid = 2160898732728284029
-^3 = gv: (name: "llvm.global_ctors", summaries: (variable: (module: ^0, flags: (linkage: appending, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^14)))) ; guid = 2412314959268824392
-^4 = gv: (name: "?foo@S@@UEAAXXZ") ; guid = 6578172636330484861
-^5 = gv: (name: "??_SS@@6B@", summaries: (alias: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^10))) ; guid = 8774897714842691026
-^6 = gv: (name: "??_R1A@?0A@EA@S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^11, ^1, ^8)))) ; guid = 9397802696236423453
-^7 = gv: (name: "?undefined_ref@@YAXXZ") ; guid = 9774674600202276560
-^8 = gv: (name: "??_R3S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^2)))) ; guid = 10685958509605791599
-^9 = gv: (name: "??_7type_info@@6B@") ; guid = 10826752452437539368
-^10 = gv: (name: "anon.bcb2691509de99310dddb690fcdb4cdc.0", summaries: (variable: (module: ^0, flags: (linkage: private, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), vTableFuncs: ((virtFunc: ^4, offset: 8)), refs: (^13, ^4)))) ; guid = 11510395461204283992
-^11 = gv: (name: "??_R0?AUS@@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^9)))) ; guid = 12346607659584231960
-^12 = gv: (name: "?d@@3UInit@@A", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1)))) ; guid = 14563354643524156382
-^13 = gv: (name: "??_R4S@@6B@", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^13, ^11, ^1, ^8)))) ; guid = 14703528065171087394
-^14 = gv: (name: "_GLOBAL__sub_I_t.cpp", summaries: (function: (module: ^0, flags: (linkage: internal, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 3, calls: ((callee: ^7)), refs: (^12, ^5)))) ; guid = 15085897428757412588
-^15 = typeidCompatibleVTable: (name: "?AUS@@", summary: ((offset: 8, ^10))) ; guid = 13986515119763165370
diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
deleted file mode 100644
index ff73e7c6ba680..0000000000000
--- a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; ModuleID = 'b.obj'
-source_filename = "b.cpp"
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc19.21.27702"
-
-%struct.S = type { i32 (...)** }
-
-; Function Attrs: norecurse nounwind readnone sspstrong uwtable
-define dso_local void @"?foo@S@@UEAAXXZ"(%struct.S* nocapture %this) unnamed_addr #0 align 2 {
-entry:
-  ret void
-}
-
-attributes #0 = { norecurse nounwind readnone sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.linker.options = !{!0, !1}
-!llvm.module.flags = !{!2, !3, !4, !5}
-!llvm.ident = !{!6}
-
-!0 = !{!"/DEFAULTLIB:libcmt.lib"}
-!1 = !{!"/DEFAULTLIB:oldnames.lib"}
-!2 = !{i32 1, !"wchar_size", i32 2}
-!3 = !{i32 7, !"PIC Level", i32 2}
-!4 = !{i32 1, !"ThinLTO", i32 0}
-!5 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
-!6 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"}
-
-^0 = module: (path: "b.obj", hash: (0, 0, 0, 0, 0))
-^1 = gv: (name: "?foo@S@@UEAAXXZ", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0)))) ; guid = 6578172636330484861
diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test
deleted file mode 100644
index 6911b121122a4..0000000000000
--- a/lld/test/COFF/undefined-symbol-lto.test
+++ /dev/null
@@ -1,30 +0,0 @@
-RUN: rm -rf %t && mkdir -p %t && cd %t
-RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj
-RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj
-RUN: llvm-lib b.obj -out:b.lib
-RUN: not lld-link t.obj b.lib -subsystem:console 2>&1 | FileCheck %s
-
-CHECK: undefined symbol: main
-CHECK: referenced by
-CHECK: undefined symbol: void __cdecl undefined_ref(void)
-CHECK: referenced by
-
-Originally reported as PR42536.
-
-a.ll corresponds to this C++:
-
-struct __declspec(dllimport) S {
-  virtual void foo();
-};
-void undefined_ref();
-struct Init {
-  Init() { undefined_ref(); }
-  S c;
-} d;
-
-b.ll is from this C++:
-
-struct S {
-  virtual void foo();
-};
-void S::foo() {}

From 1447b60eeb2b3026a0c96bef052843a71002d617 Mon Sep 17 00:00:00 2001
From: Michal Gorny <mgorny@gentoo.org>
Date: Sat, 13 Jul 2019 06:24:14 +0000
Subject: [PATCH 028/451] [lldb] [test] Un-XFAIL TestFormattersSBAPI on NetBSD

llvm-svn: 365991
---
 .../lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py b/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py
index dd12ac198cd35..8548506fdc463 100644
--- a/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py
+++ b/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py
@@ -22,7 +22,6 @@ def setUp(self):
         self.line = line_number('main.cpp', '// Set break point at this line.')
 
     @add_test_categories(['pyapi'])
-    @expectedFailureNetBSD
     def test_formatters_api(self):
         """Test Python APIs for working with formatters"""
         self.build()

From 21a92a8a559ba27907290bafd181e490101a4fcb Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sat, 13 Jul 2019 06:27:35 +0000
Subject: [PATCH 029/451] This reverts commit
 632a36bfcfc8273c1861f04ff6758d863c47c784.

Some targets such as Python 2.7.16 still use VERSION in
their builds. Without VERSION defined, the source code
has syntax errors.

Reverting as it will probably break many other things.

Noticed by Sterling Augustine

llvm-svn: 365992
---
 clang/docs/LanguageExtensions.rst       |  2 --
 clang/docs/ReleaseNotes.rst             | 10 ----------
 clang/lib/Basic/Version.cpp             |  2 ++
 clang/lib/Frontend/InitPreprocessor.cpp |  6 ++++++
 clang/test/Index/complete-exprs.c       |  2 ++
 clang/test/Preprocessor/init.c          |  4 ++++
 clang/utils/builtin-defines.c           |  1 +
 7 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 61152a251bdba..ecbf04c3c822a 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -324,8 +324,6 @@ option for a warning and returns true if that is a valid warning option.
   ...
   #endif
 
-.. _languageextensions-builtin-macros:
-
 Builtin Macros
 ==============
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7185030a92d7c..f0a35050dde08 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -56,11 +56,6 @@ Improvements to Clang's diagnostics
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
-- The ``__VERSION__`` macro has been removed.
-  Previously this macro was set to a string aiming to achieve compatibility with 
-  GCC 4.2.1, but that should no longer be necessary. To get Clang's version,
-  use the :ref:`clang namespaced version macros <languageextensions-builtin-macros>`.
-
 - ...
 
 
@@ -84,11 +79,6 @@ Modified Compiler Flags
 
 - ...
 
-Removed Compiler Options
-------------------------
-
-- ...
-
 New Pragmas in Clang
 --------------------
 
diff --git a/clang/lib/Basic/Version.cpp b/clang/lib/Basic/Version.cpp
index 5fd12762b6893..d6564582e7726 100644
--- a/clang/lib/Basic/Version.cpp
+++ b/clang/lib/Basic/Version.cpp
@@ -136,6 +136,8 @@ std::string getClangToolFullVersion(StringRef ToolName) {
 }
 
 std::string getClangFullCPPVersion() {
+  // The version string we report in __VERSION__ is just a compacted version of
+  // the one we report on the command line.
   std::string buf;
   llvm::raw_string_ostream OS(buf);
 #ifdef CLANG_VENDOR
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index a02c266c094a8..1741ba5e5203e 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -604,6 +604,12 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   // Support for #pragma redefine_extname (Sun compatibility)
   Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1");
 
+  // As sad as it is, enough software depends on the __VERSION__ for version
+  // checks that it is necessary to report 4.2.1 (the base GCC version we claim
+  // compatibility with) first.
+  Builder.defineMacro("__VERSION__", "\"4.2.1 Compatible " +
+                      Twine(getClangFullCPPVersion()) + "\"");
+
   // Initialize language-specific preprocessor defines.
 
   // Standard conforming mode?
diff --git a/clang/test/Index/complete-exprs.c b/clang/test/Index/complete-exprs.c
index 50f5025f1512f..9beb16deef99b 100644
--- a/clang/test/Index/complete-exprs.c
+++ b/clang/test/Index/complete-exprs.c
@@ -27,6 +27,7 @@ void f5(float f) {
 // RUN: c-index-test -code-completion-at=%s:7:10 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s
 // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:7:10 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s
 // CHECK-CC1: NotImplemented:{TypedText __PRETTY_FUNCTION__} (65)
+// CHECK-CC1: macro definition:{TypedText __VERSION__} (70)
 // CHECK-CC1: FunctionDecl:{ResultType int}{TypedText f}{LeftParen (}{Placeholder int}{RightParen )} (12) (unavailable)
 // CHECK-CC1-NOT: NotImplemented:{TypedText float} (65)
 // CHECK-CC1: ParmDecl:{ResultType int}{TypedText j} (8)
@@ -38,6 +39,7 @@ void f5(float f) {
 // RUN: c-index-test -code-completion-at=%s:7:18 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s
 // RUN: c-index-test -code-completion-at=%s:7:22 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s
 // RUN: c-index-test -code-completion-at=%s:7:2 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC2 %s
+// CHECK-CC2: macro definition:{TypedText __VERSION__} (70)
 // CHECK-CC2: FunctionDecl:{ResultType int}{TypedText f}{LeftParen (}{Placeholder int}{RightParen )} (50)
 // CHECK-CC2: NotImplemented:{TypedText float} (50)
 // CHECK-CC2: ParmDecl:{ResultType int}{TypedText j} (34)
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 00a7c7b6bae95..8df3b4bd2ccf4 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -101,6 +101,7 @@
 // COMMON:#define __ORDER_PDP_ENDIAN__ 3412
 // COMMON:#define __STDC_HOSTED__ 1
 // COMMON:#define __STDC__ 1
+// COMMON:#define __VERSION__ {{.*}}
 // COMMON:#define __clang__ 1
 // COMMON:#define __clang_major__ {{[0-9]+}}
 // COMMON:#define __clang_minor__ {{[0-9]+}}
@@ -8168,6 +8169,7 @@
 // SPARC:#define __UINT_LEAST8_MAX__ 255
 // SPARC:#define __UINT_LEAST8_TYPE__ unsigned char
 // SPARC:#define __USER_LABEL_PREFIX__
+// SPARC:#define __VERSION__ "4.2.1 Compatible{{.*}}
 // SPARC:#define __WCHAR_MAX__ 2147483647
 // SPARC:#define __WCHAR_TYPE__ int
 // SPARC:#define __WCHAR_WIDTH__ 32
@@ -9039,6 +9041,7 @@
 // X86_64-CLOUDABI:#define __UINT_LEAST8_MAX__ 255
 // X86_64-CLOUDABI:#define __UINT_LEAST8_TYPE__ unsigned char
 // X86_64-CLOUDABI:#define __USER_LABEL_PREFIX__
+// X86_64-CLOUDABI:#define __VERSION__ "4.2.1 Compatible{{.*}}
 // X86_64-CLOUDABI:#define __WCHAR_MAX__ 2147483647
 // X86_64-CLOUDABI:#define __WCHAR_TYPE__ int
 // X86_64-CLOUDABI:#define __WCHAR_WIDTH__ 32
@@ -10040,6 +10043,7 @@
 // WEBASSEMBLY-NEXT:#define __UINT_LEAST8_MAX__ 255
 // WEBASSEMBLY-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char
 // WEBASSEMBLY-NEXT:#define __USER_LABEL_PREFIX__
+// WEBASSEMBLY-NEXT:#define __VERSION__ "{{.*}}"
 // WEBASSEMBLY-NEXT:#define __WCHAR_MAX__ 2147483647
 // WEBASSEMBLY-NEXT:#define __WCHAR_TYPE__ int
 // WEBASSEMBLY-NOT:#define __WCHAR_UNSIGNED__
diff --git a/clang/utils/builtin-defines.c b/clang/utils/builtin-defines.c
index 2936d631e61a7..9bbe5be250269 100644
--- a/clang/utils/builtin-defines.c
+++ b/clang/utils/builtin-defines.c
@@ -49,6 +49,7 @@ RUN: done;
 #undef __INT8_TYPE__
 #undef __SSP__
 #undef __APPLE_CC__
+#undef __VERSION__
 #undef __clang__
 #undef __llvm__
 #undef __nocona

From 36fbd0da5fb7ac70146d2118165556d4af19fd8b Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sat, 13 Jul 2019 07:23:12 +0000
Subject: [PATCH 030/451] Simplify with llvm::is_contained. NFC

llvm-svn: 365993
---
 .../clang-tidy/bugprone/AssertSideEffectCheck.cpp          | 3 +--
 .../bugprone/ForwardingReferenceOverloadCheck.cpp          | 2 +-
 clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp | 7 ++-----
 clang-tools-extra/modularize/Modularize.cpp                | 2 +-
 4 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
index a28ef1138e539..4e3f76544dc8b 100644
--- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
@@ -108,8 +108,7 @@ void AssertSideEffectCheck::check(const MatchFinder::MatchResult &Result) {
     StringRef MacroName = Lexer::getImmediateMacroName(Loc, SM, LangOpts);
 
     // Check if this macro is an assert.
-    if (std::find(AssertMacros.begin(), AssertMacros.end(), MacroName) !=
-        AssertMacros.end()) {
+    if (llvm::is_contained(AssertMacros, MacroName)) {
       AssertMacroName = MacroName;
       break;
     }
diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp
index 57055ff2191e8..2773d38420be3 100644
--- a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp
@@ -105,7 +105,7 @@ void ForwardingReferenceOverloadCheck::check(
   // template as the function parameter of that type. (This implies that type
   // deduction will happen on the type.)
   const TemplateParameterList *Params = FuncTemplate->getTemplateParameters();
-  if (std::find(Params->begin(), Params->end(), TypeParmDecl) == Params->end())
+  if (!llvm::is_contained(*Params, TypeParmDecl))
     return;
 
   // Every parameter after the first must have a default value.
diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp
index 45e59c3ec51ab..d0a95d95ec4b4 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp
@@ -242,10 +242,8 @@ class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
           getOutermostMacroName(StartLoc, SM, Context.getLangOpts());
 
       // Check to see if the user wants to replace the macro being expanded.
-      if (std::find(NullMacros.begin(), NullMacros.end(), OutermostMacroName) ==
-          NullMacros.end()) {
+      if (!llvm::is_contained(NullMacros, OutermostMacroName))
         return skipSubTree();
-      }
 
       StartLoc = SM.getFileLoc(StartLoc);
       EndLoc = SM.getFileLoc(EndLoc);
@@ -327,8 +325,7 @@ class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
 
         StringRef Name =
             Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts());
-        return std::find(NullMacros.begin(), NullMacros.end(), Name) !=
-               NullMacros.end();
+        return llvm::is_contained(NullMacros, Name);
       }
 
       MacroLoc = SM.getExpansionRange(ArgLoc).getBegin();
diff --git a/clang-tools-extra/modularize/Modularize.cpp b/clang-tools-extra/modularize/Modularize.cpp
index 59fc5c351ff27..866356d055b62 100644
--- a/clang-tools-extra/modularize/Modularize.cpp
+++ b/clang-tools-extra/modularize/Modularize.cpp
@@ -369,7 +369,7 @@ getModularizeArgumentsAdjuster(DependencyMap &Dependencies) {
     // Ignore warnings.  (Insert after "clang_tool" at beginning.)
     NewArgs.insert(NewArgs.begin() + 1, "-w");
     // Since we are compiling .h files, assume C++ unless given a -x option.
-    if (std::find(NewArgs.begin(), NewArgs.end(), "-x") == NewArgs.end()) {
+    if (!llvm::is_contained(NewArgs, "-x")) {
       NewArgs.insert(NewArgs.begin() + 2, "-x");
       NewArgs.insert(NewArgs.begin() + 3, "c++");
     }

From 20d34eacf3f118d1e94165beaf3da24a5c1e8011 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@chromium.org>
Date: Sat, 13 Jul 2019 08:07:10 +0000
Subject: [PATCH 031/451] [CMake][Fuchsia] Define asan+noexcept multilib

Using noexcept multilib with -fno-exceptions can lead to significant
space savings when statically linking libc++abi because we don't need
all the unwinding and demangling code.

When compiling with ASan, we already get a lot of overhead from the
instrumentation itself, when statically linking libc++abi, that overhead
is even larger.

Having the noexcept variant for ASan can help significantly, we've seen
more than 50% size reduction in our system image, which offsets the cost
of having to build another multilib.

Differential Revision: https://reviews.llvm.org/D64140

llvm-svn: 365994
---
 clang/cmake/caches/Fuchsia-stage2.cmake                | 10 +++++++++-
 clang/lib/Driver/ToolChains/Fuchsia.cpp                |  5 +++++
 .../lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so    |  0
 .../lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so     |  0
 clang/test/Driver/fuchsia.cpp                          |  3 ++-
 5 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so
 create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index e93b6e4f07ad8..1f8a9e78763fa 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -153,13 +153,21 @@ if(FUCHSIA_SDK)
     set(RUNTIMES_${target}-unknown-fuchsia+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
     set(RUNTIMES_${target}-unknown-fuchsia+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
 
+    set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LLVM_BUILD_COMPILER_RT OFF CACHE BOOL "")
+    set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LLVM_USE_SANITIZER "Address" CACHE STRING "")
+    set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
+    set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
+    set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+    set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+
     # Use .build-id link.
     list(APPEND RUNTIME_BUILD_ID_LINK "${target}-unknown-fuchsia")
   endforeach()
 
-  set(LLVM_RUNTIME_MULTILIBS "asan;noexcept" CACHE STRING "")
+  set(LLVM_RUNTIME_MULTILIBS "asan;noexcept;asan+noexcept" CACHE STRING "")
   set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "")
   set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "")
+  set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "")
 endif()
 
 set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "")
diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp
index 2344a69adb962..1f5ec9ebb16d5 100644
--- a/clang/lib/Driver/ToolChains/Fuchsia.cpp
+++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp
@@ -192,6 +192,11 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple,
   // ASan has higher priority because we always want the instrumentated version.
   Multilibs.push_back(Multilib("asan", {}, {}, 2)
                           .flag("+fsanitize=address"));
+  // Use the asan+noexcept variant with ASan and -fno-exceptions.
+  Multilibs.push_back(Multilib("asan+noexcept", {}, {}, 3)
+                          .flag("+fsanitize=address")
+                          .flag("-fexceptions")
+                          .flag("+fno-exceptions"));
   Multilibs.FilterOut([&](const Multilib &M) {
     std::vector<std::string> RD = FilePaths(M);
     return std::all_of(RD.begin(), RD.end(), [&](std::string P) {
diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/fuchsia.cpp b/clang/test/Driver/fuchsia.cpp
index 823ded4b91544..a5297e76964e4 100644
--- a/clang/test/Driver/fuchsia.cpp
+++ b/clang/test/Driver/fuchsia.cpp
@@ -70,8 +70,9 @@
 // RUN:     -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \
 // RUN:     -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:     -fuse-ld=lld 2>&1\
-// RUN:     | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-ASAN-X86
+// RUN:     | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-ASAN-NOEXCEPT-X86
 // CHECK-MULTILIB-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-MULTILIB-ASAN-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}asan"
 // CHECK-MULTILIB-NOEXCEPT-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}noexcept"
+// CHECK-MULTILIB-ASAN-NOEXCEPT-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}asan+noexcept"
 // CHECK-MULTILIB-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++"

From f1d865398b1cf5c082486fe51b52e0b41986640b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 13 Jul 2019 08:08:43 +0000
Subject: [PATCH 032/451] Fix -Wdocumentation warning. NFCI.

llvm-svn: 365995
---
 clang/include/clang/DirectoryWatcher/DirectoryWatcher.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h b/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h
index 0bf966bb832ab..e74443e0bc81c 100644
--- a/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h
+++ b/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h
@@ -98,8 +98,7 @@ class DirectoryWatcher {
         : Kind(Kind), Filename(Filename) {}
   };
 
-  /// Returns nullptr if \param Path doesn't exist.
-  /// Returns nullptr if \param Path isn't a directory.
+  /// Returns nullptr if \param Path doesn't exist or isn't a directory.
   /// Returns nullptr if OS kernel API told us we can't start watching. In such
   /// case it's unclear whether just retrying has any chance to succeeed.
   static std::unique_ptr<DirectoryWatcher>

From 16ac7a5a27c7c3a668c67c776f9230de409bf004 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sat, 13 Jul 2019 09:23:35 +0000
Subject: [PATCH 033/451] [Object] isNotObjectErrorInvalidFileType: fix
 use-after-move

llvm-svn: 365996
---
 llvm/lib/Object/Error.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp
index f2a009000c58d..ab10d23036a26 100644
--- a/llvm/lib/Object/Error.cpp
+++ b/llvm/lib/Object/Error.cpp
@@ -91,5 +91,5 @@ llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) {
             return Error(std::move(M));
           }))
     return Err2;
-  return Err;
+  return Error::success();
 }

From 327db23b6642499fab917014a4c9934c1649e120 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sat, 13 Jul 2019 09:28:33 +0000
Subject: [PATCH 034/451] [Object] isNotObjectErrorInvalidFileType: simplify

llvm-svn: 365997
---
 llvm/lib/Object/Error.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp
index ab10d23036a26..010c5b42dac25 100644
--- a/llvm/lib/Object/Error.cpp
+++ b/llvm/lib/Object/Error.cpp
@@ -78,18 +78,15 @@ const std::error_category &object::object_category() {
 }
 
 llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) {
-  if (auto Err2 =
-          handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
-            // Try to handle 'M'. If successful, return a success value from
-            // the handler.
-            if (M->convertToErrorCode() == object_error::invalid_file_type)
-              return Error::success();
+  return handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
+    // Try to handle 'M'. If successful, return a success value from
+    // the handler.
+    if (M->convertToErrorCode() == object_error::invalid_file_type)
+      return Error::success();
 
-            // We failed to handle 'M' - return it from the handler.
-            // This value will be passed back from catchErrors and
-            // wind up in Err2, where it will be returned from this function.
-            return Error(std::move(M));
-          }))
-    return Err2;
-  return Error::success();
+    // We failed to handle 'M' - return it from the handler.
+    // This value will be passed back from catchErrors and
+    // wind up in Err2, where it will be returned from this function.
+    return Error(std::move(M));
+  });
 }

From 2097f75eabb94c7eafcfba9cbfd6b60f08a4ded6 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 13 Jul 2019 12:04:52 +0000
Subject: [PATCH 035/451] [x86] simplify cmov with same true/false operands

llvm-svn: 365998
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++
 llvm/test/CodeGen/X86/combine-sbb.ll    | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6b152fe9d7ac1..e0bcf70248948 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36783,6 +36783,10 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
   X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
   SDValue Cond = N->getOperand(3);
 
+  // cmov X, X, ?, ? --> X
+  if (TrueOp == FalseOp)
+    return TrueOp;
+
   // Try to simplify the EFLAGS and condition code operands.
   // We can't always do this as FCMOV only supports a subset of X86 cond.
   if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index f9ac10755aaf0..9e68ab4beb16b 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -291,9 +291,8 @@ define i32 @PR40483_sub5(i32*, i32) {
 ;
 ; X64-LABEL: PR40483_sub5:
 ; X64:       # %bb.0:
-; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    subl %esi, (%rdi)
-; X64-NEXT:    cmovael %eax, %eax
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq
   %3 = load i32, i32* %0, align 8
   %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)

From 0f6148df23edcd3081f5e761de19edd4f823f16d Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 13 Jul 2019 12:54:48 +0000
Subject: [PATCH 036/451] [InstCombine] add tests for umin/umax via usub.sat;
 NFC

llvm-svn: 365999
---
 .../InstCombine/saturating-add-sub.ll         | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 364c80d205f2d..56e10626104cf 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1614,3 +1614,75 @@ define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) {
   %r = add i32 %s, -42
   ret i32 %r
 }
+
+define i8 @umax(i8 %a, i8 %b) {
+; CHECK-LABEL: @umax(
+; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[USUB]], [[B]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
+  %r = add i8 %usub, %b
+  ret i8 %r
+}
+
+define <2 x i8> @umax_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @umax_vec(
+; CHECK-NEXT:    [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[USUB]], [[B]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b)
+  %r = add <2 x i8> %usub, %b
+  ret <2 x i8> %r
+}
+
+define i8 @umax_extra_use(i8 %a, i8 %b) {
+; CHECK-LABEL: @umax_extra_use(
+; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT:    call void @use(i8 [[USUB]])
+; CHECK-NEXT:    [[R:%.*]] = add i8 [[USUB]], [[B]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
+  call void @use(i8 %usub)
+  %r = add i8 %usub, %b
+  ret i8 %r
+}
+
+define i8 @umin(i8 %a, i8 %b) {
+; CHECK-LABEL: @umin(
+; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[A]], [[USUB]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
+  %r = sub i8 %a, %usub
+  ret i8 %r
+}
+
+define <2 x i8> @umin_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @umin_vec(
+; CHECK-NEXT:    [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[A]], [[USUB]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b)
+  %r = sub <2 x i8> %a, %usub
+  ret <2 x i8> %r
+}
+
+define i8 @umin_extra_use(i8 %a, i8 %b) {
+; CHECK-LABEL: @umin_extra_use(
+; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT:    call void @use(i8 [[USUB]])
+; CHECK-NEXT:    [[R:%.*]] = sub i8 [[A]], [[USUB]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
+  call void @use(i8 %usub)
+  %r = sub i8 %a, %usub
+  ret i8 %r
+}
+
+declare void @use(i8)

From 22cc1030f6a9afd14cc48ec0b935ebe8678c0c2e Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 13 Jul 2019 13:16:46 +0000
Subject: [PATCH 037/451] Revert "[InstCombine] add tests for umin/umax via
 usub.sat; NFC"

This reverts commit rL365999 / 0f6148df23edcd3081f5e761de19edd4f823f16d.
The tests already exist in this file, and the hoped-for transform
(mentioned in D62871) is invalid because of undef as discussed in
D63060.

llvm-svn: 366000
---
 .../InstCombine/saturating-add-sub.ll         | 72 -------------------
 1 file changed, 72 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index 56e10626104cf..364c80d205f2d 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -1614,75 +1614,3 @@ define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) {
   %r = add i32 %s, -42
   ret i32 %r
 }
-
-define i8 @umax(i8 %a, i8 %b) {
-; CHECK-LABEL: @umax(
-; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[USUB]], [[B]]
-; CHECK-NEXT:    ret i8 [[R]]
-;
-  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
-  %r = add i8 %usub, %b
-  ret i8 %r
-}
-
-define <2 x i8> @umax_vec(<2 x i8> %a, <2 x i8> %b) {
-; CHECK-LABEL: @umax_vec(
-; CHECK-NEXT:    [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[USUB]], [[B]]
-; CHECK-NEXT:    ret <2 x i8> [[R]]
-;
-  %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b)
-  %r = add <2 x i8> %usub, %b
-  ret <2 x i8> %r
-}
-
-define i8 @umax_extra_use(i8 %a, i8 %b) {
-; CHECK-LABEL: @umax_extra_use(
-; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
-; CHECK-NEXT:    call void @use(i8 [[USUB]])
-; CHECK-NEXT:    [[R:%.*]] = add i8 [[USUB]], [[B]]
-; CHECK-NEXT:    ret i8 [[R]]
-;
-  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
-  call void @use(i8 %usub)
-  %r = add i8 %usub, %b
-  ret i8 %r
-}
-
-define i8 @umin(i8 %a, i8 %b) {
-; CHECK-LABEL: @umin(
-; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[A]], [[USUB]]
-; CHECK-NEXT:    ret i8 [[R]]
-;
-  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
-  %r = sub i8 %a, %usub
-  ret i8 %r
-}
-
-define <2 x i8> @umin_vec(<2 x i8> %a, <2 x i8> %b) {
-; CHECK-LABEL: @umin_vec(
-; CHECK-NEXT:    [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[A]], [[USUB]]
-; CHECK-NEXT:    ret <2 x i8> [[R]]
-;
-  %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b)
-  %r = sub <2 x i8> %a, %usub
-  ret <2 x i8> %r
-}
-
-define i8 @umin_extra_use(i8 %a, i8 %b) {
-; CHECK-LABEL: @umin_extra_use(
-; CHECK-NEXT:    [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]])
-; CHECK-NEXT:    call void @use(i8 [[USUB]])
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[A]], [[USUB]]
-; CHECK-NEXT:    ret i8 [[R]]
-;
-  %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b)
-  call void @use(i8 %usub)
-  %r = sub i8 %a, %usub
-  ret i8 %r
-}
-
-declare void @use(i8)

From 2a7f5204602938ae89b0860e9412603d1951d945 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomasp@graphcore.ai>
Date: Sat, 13 Jul 2019 13:24:30 +0000
Subject: [PATCH 038/451] FileCheck [7/12]: Arbitrary long numeric expressions

Summary:
This patch is part of a patch series to add support for FileCheck
numeric expressions. This specific patch extend numeric expression to
support an arbitrary number of operands, either variable or literals.

Copyright:
    - Linaro (changes up to diff 183612 of revision D55940)
    - GraphCore (changes in later versions of revision D55940 and
                 in new revision created off D55940)

Reviewers: jhenderson, chandlerc, jdenny, probinson, grimar, arichardson, rnk

Subscribers: hiraditya, llvm-commits, probinson, dblaikie, grimar, arichardson, tra, rnk, kristina, hfinkel, rogfer01, JonChesterfield

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60387

llvm-svn: 366001
---
 llvm/docs/CommandGuide/FileCheck.rst       |  25 +-
 llvm/include/llvm/Support/FileCheck.h      | 208 ++++++++++------
 llvm/lib/Support/FileCheck.cpp             | 208 +++++++++-------
 llvm/test/FileCheck/line-count.txt         |   2 +-
 llvm/test/FileCheck/numeric-expression.txt |  20 +-
 llvm/test/FileCheck/var-scope.txt          |   4 +-
 llvm/unittests/Support/FileCheckTest.cpp   | 273 ++++++++++++---------
 7 files changed, 446 insertions(+), 294 deletions(-)

diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst
index a424606d4ce0a..0aa2d89fbcf08 100644
--- a/llvm/docs/CommandGuide/FileCheck.rst
+++ b/llvm/docs/CommandGuide/FileCheck.rst
@@ -107,10 +107,12 @@ and from the command line.
   Sets a filecheck pattern variable ``VAR`` with value ``VALUE`` that can be
   used in ``CHECK:`` lines.
 
-.. option:: -D#<NUMVAR>=<VALUE>
+.. option:: -D#<NUMVAR>=<VALUE EXPRESSION>
 
-  Sets a filecheck numeric variable ``NUMVAR`` to ``<VALUE>`` that can be used
-  in ``CHECK:`` lines.
+  Sets a filecheck numeric variable ``NUMVAR`` to the result of evaluating
+  ``<VALUE EXPRESSION>`` that can be used in ``CHECK:`` lines. See section
+  ``FileCheck Numeric Variables and Expressions`` for details on the format
+  and meaning of ``<VALUE EXPRESSION>``.
 
 .. option:: -version
 
@@ -590,18 +592,15 @@ For example:
 
 would match ``mov r5, 42`` and set ``REG`` to the value ``5``.
 
-The syntax of a numeric substitution is ``[[#<NUMVAR><op><offset>]]`` where:
+The syntax of a numeric substitution is ``[[#<expr>]]`` where ``<expr>`` is an
+expression. An expression is recursively defined as:
 
-* ``<NUMVAR>`` is the name of a defined numeric variable.
+* a numeric operand, or
+* an expression followed by an operator and a numeric operand.
 
-* ``<op>`` is an optional operation to perform on the value of ``<NUMVAR>``.
-  Currently supported operations are ``+`` and ``-``.
-
-* ``<offset>`` is the immediate value that constitutes the second operand of
-  the operation ``<op>``. It must be present if ``<op>`` is present, absent
-  otherwise.
-
-Spaces are accepted before, after and between any of these elements.
+A numeric operand is a previously defined numeric variable, or an integer
+literal. The supported operators are ``+`` and ``-``. Spaces are accepted
+before, after and between any of these elements.
 
 For example:
 
diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h
index b3a8433b54e65..caff50b0ca466 100644
--- a/llvm/include/llvm/Support/FileCheck.h
+++ b/llvm/include/llvm/Support/FileCheck.h
@@ -40,6 +40,54 @@ struct FileCheckRequest {
 // Numeric substitution handling code.
 //===----------------------------------------------------------------------===//
 
+/// Base class representing the AST of a given expression.
+class FileCheckExpressionAST {
+public:
+  virtual ~FileCheckExpressionAST() = default;
+
+  /// Evaluates and \returns the value of the expression represented by this
+  /// AST or an error if evaluation fails.
+  virtual Expected<uint64_t> eval() const = 0;
+};
+
+/// Class representing an unsigned literal in the AST of an expression.
+class FileCheckExpressionLiteral : public FileCheckExpressionAST {
+private:
+  /// Actual value of the literal.
+  uint64_t Value;
+
+public:
+  /// Constructs a literal with the specified value.
+  FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {}
+
+  /// \returns the literal's value.
+  Expected<uint64_t> eval() const { return Value; }
+};
+
+/// Class to represent an undefined variable error, which quotes that
+/// variable's name when printed.
+class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
+private:
+  StringRef VarName;
+
+public:
+  static char ID;
+
+  FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
+
+  StringRef getVarName() const { return VarName; }
+
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+
+  /// Print name of variable associated with this error.
+  void log(raw_ostream &OS) const override {
+    OS << "\"";
+    OS.write_escaped(VarName) << "\"";
+  }
+};
+
 /// Class representing a numeric variable and its associated current value.
 class FileCheckNumericVariable {
 private:
@@ -81,56 +129,53 @@ class FileCheckNumericVariable {
   size_t getDefLineNumber() { return DefLineNumber; }
 };
 
-/// Type of functions evaluating a given binary operation.
-using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
-
-/// Class to represent an undefined variable error which prints that variable's
-/// name between quotes when printed.
-class FileCheckUndefVarError : public ErrorInfo<FileCheckUndefVarError> {
+/// Class representing the use of a numeric variable in the AST of an
+/// expression.
+class FileCheckNumericVariableUse : public FileCheckExpressionAST {
 private:
-  StringRef VarName;
-
-public:
-  static char ID;
-
-  FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {}
+  /// Name of the numeric variable.
+  StringRef Name;
 
-  StringRef getVarName() const { return VarName; }
+  /// Pointer to the class instance for the variable this use is about.
+  FileCheckNumericVariable *NumericVariable;
 
-  std::error_code convertToErrorCode() const override {
-    return inconvertibleErrorCode();
-  }
+public:
+  FileCheckNumericVariableUse(StringRef Name,
+                              FileCheckNumericVariable *NumericVariable)
+      : Name(Name), NumericVariable(NumericVariable) {}
 
-  /// Print name of variable associated with this error.
-  void log(raw_ostream &OS) const override {
-    OS << "\"";
-    OS.write_escaped(VarName) << "\"";
-  }
+  /// \returns the value of the variable referenced by this instance.
+  Expected<uint64_t> eval() const;
 };
 
-/// Class representing an expression consisting of either a single numeric
-/// variable or a binary operation between a numeric variable and an
-/// immediate.
-class FileCheckExpression {
+/// Type of functions evaluating a given binary operation.
+using binop_eval_t = uint64_t (*)(uint64_t, uint64_t);
+
+/// Class representing a single binary operation in the AST of an expression.
+class FileCheckASTBinop : public FileCheckExpressionAST {
 private:
   /// Left operand.
-  FileCheckNumericVariable *LeftOp;
+  std::unique_ptr<FileCheckExpressionAST> LeftOperand;
 
   /// Right operand.
-  uint64_t RightOp;
+  std::unique_ptr<FileCheckExpressionAST> RightOperand;
 
   /// Pointer to function that can evaluate this binary operation.
   binop_eval_t EvalBinop;
 
 public:
-  FileCheckExpression(binop_eval_t EvalBinop,
-                      FileCheckNumericVariable *OperandLeft,
-                      uint64_t OperandRight)
-      : LeftOp(OperandLeft), RightOp(OperandRight), EvalBinop(EvalBinop) {}
-
-  /// Evaluates the value of this expression, using EvalBinop to perform the
-  /// binary operation it consists of. \returns an error if the numeric
-  /// variable used is undefined, or the expression value otherwise.
+  FileCheckASTBinop(binop_eval_t EvalBinop,
+                    std::unique_ptr<FileCheckExpressionAST> LeftOp,
+                    std::unique_ptr<FileCheckExpressionAST> RightOp)
+      : EvalBinop(EvalBinop) {
+    LeftOperand = std::move(LeftOp);
+    RightOperand = std::move(RightOp);
+  }
+
+  /// Evaluates the value of the binary operation represented by this AST,
+  /// using EvalBinop on the result of recursively evaluating the operands.
+  /// \returns the expression value or an error if an undefined numeric
+  /// variable is used in one of the operands.
   Expected<uint64_t> eval() const;
 };
 
@@ -187,15 +232,15 @@ class FileCheckNumericSubstitution : public FileCheckSubstitution {
 private:
   /// Pointer to the class representing the expression whose value is to be
   /// substituted.
-  FileCheckExpression *Expression;
+  std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
 
 public:
-  FileCheckNumericSubstitution(FileCheckPatternContext *Context,
-                               StringRef ExpressionStr,
-                               FileCheckExpression *Expression,
+  FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr,
+                               std::unique_ptr<FileCheckExpressionAST> ExprAST,
                                size_t InsertIdx)
-      : FileCheckSubstitution(Context, ExpressionStr, InsertIdx),
-        Expression(Expression) {}
+      : FileCheckSubstitution(Context, Expr, InsertIdx) {
+    ExpressionAST = std::move(ExprAST);
+  }
 
   /// \returns a string containing the result of evaluating the expression in
   /// this substitution, or an error if evaluation failed.
@@ -278,10 +323,6 @@ class FileCheckPatternContext {
   /// easily updating its value.
   FileCheckNumericVariable *LineVariable = nullptr;
 
-  /// Vector holding pointers to all parsed expressions. Used to automatically
-  /// free the expressions once they are guaranteed to no longer be used.
-  std::vector<std::unique_ptr<FileCheckExpression>> Expressions;
-
   /// Vector holding pointers to all parsed numeric variables. Used to
   /// automatically free them once they are guaranteed to no longer be used.
   std::vector<std::unique_ptr<FileCheckNumericVariable>> NumericVariables;
@@ -313,12 +354,6 @@ class FileCheckPatternContext {
   void clearLocalVars();
 
 private:
-  /// Makes a new expression instance and registers it for destruction when
-  /// the context is destroyed.
-  FileCheckExpression *makeExpression(binop_eval_t EvalBinop,
-                                      FileCheckNumericVariable *OperandLeft,
-                                      uint64_t OperandRight);
-
   /// Makes a new numeric variable and registers it for destruction when the
   /// context is destroyed.
   template <class... Types>
@@ -333,7 +368,8 @@ class FileCheckPatternContext {
   /// the context is destroyed.
   FileCheckSubstitution *
   makeNumericSubstitution(StringRef ExpressionStr,
-                          FileCheckExpression *Expression, size_t InsertIdx);
+                          std::unique_ptr<FileCheckExpressionAST> ExpressionAST,
+                          size_t InsertIdx);
 };
 
 /// Class to represent an error holding a diagnostic with location information
@@ -458,13 +494,20 @@ class FileCheckPattern {
 
   /// \returns whether \p C is a valid first character for a variable name.
   static bool isValidVarNameStart(char C);
+
+  /// Parsing information about a variable.
+  struct VariableProperties {
+    StringRef Name;
+    bool IsPseudo;
+  };
+
   /// Parses the string at the start of \p Str for a variable name. \returns
-  /// an error holding a diagnostic against \p SM if parsing fail, or the
-  /// name of the variable otherwise. In the latter case, sets \p IsPseudo to
-  /// indicate if it is a pseudo variable and strips \p Str from the variable
-  /// name.
-  static Expected<StringRef> parseVariable(StringRef &Str, bool &IsPseudo,
-                                           const SourceMgr &SM);
+  /// a VariableProperties structure holding the variable name and whether it
+  /// is the name of a pseudo variable, or an error holding a diagnostic
+  /// against \p SM if parsing fail. If parsing was successful, also strips
+  /// \p Str from the variable name.
+  static Expected<VariableProperties> parseVariable(StringRef &Str,
+                                                    const SourceMgr &SM);
   /// Parses \p Expr for the name of a numeric variable to be defined at line
   /// \p LineNumber. \returns a pointer to the class instance representing that
   /// variable, creating it if needed, or an error holding a diagnostic against
@@ -473,16 +516,19 @@ class FileCheckPattern {
   parseNumericVariableDefinition(StringRef &Expr,
                                  FileCheckPatternContext *Context,
                                  size_t LineNumber, const SourceMgr &SM);
-  /// Parses \p Expr for a numeric substitution block. \returns the class
-  /// representing the AST of the expression whose value must be substituted,
-  /// or an error holding a diagnostic against \p SM if parsing fails. If
-  /// substitution was successful, sets \p DefinedNumericVariable to point to
-  /// the class representing the numeric variable defined in this numeric
+  /// Parses \p Expr for a numeric substitution block. Parameter
+  /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
+  /// expression. \returns a pointer to the class instance representing the AST
+  /// of the expression whose value must be substituted, or an error holding a
+  /// diagnostic against \p SM if parsing fails. If substitution was
+  /// successful, sets \p DefinedNumericVariable to point to the class
+  /// representing the numeric variable being defined in this numeric
   /// substitution block, or None if this block does not define any variable.
-  Expected<FileCheckExpression *> parseNumericSubstitutionBlock(
+  Expected<std::unique_ptr<FileCheckExpressionAST>>
+  parseNumericSubstitutionBlock(
       StringRef Expr,
       Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
-      const SourceMgr &SM) const;
+      bool IsLegacyLineExpr, const SourceMgr &SM) const;
   /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern
   /// instance accordingly.
   ///
@@ -507,7 +553,7 @@ class FileCheckPattern {
   Expected<size_t> match(StringRef Buffer, size_t &MatchLen,
                          const SourceMgr &SM) const;
   /// Prints the value of successful substitutions or the name of the undefined
-  /// string or numeric variable preventing a successful substitution.
+  /// string or numeric variables preventing a successful substitution.
   void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
                           SMRange MatchRange = None) const;
   void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
@@ -536,16 +582,28 @@ class FileCheckPattern {
   /// was not found.
   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
 
-  /// Parses \p Expr for the use of a numeric variable. \returns the pointer to
-  /// the class instance representing that variable if successful, or an error
+  /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use.
+  /// \returns the pointer to the class instance representing that variable if
+  /// successful, or an error holding a diagnostic against \p SM otherwise.
+  Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+  parseNumericVariableUse(StringRef Name, bool IsPseudo,
+                          const SourceMgr &SM) const;
+  enum class AllowedOperand { LineVar, Literal, Any };
+  /// Parses \p Expr for use of a numeric operand. Accepts both literal values
+  /// and numeric variables, depending on the value of \p AO. \returns the
+  /// class representing that operand in the AST of the expression or an error
   /// holding a diagnostic against \p SM otherwise.
-  Expected<FileCheckNumericVariable *>
-  parseNumericVariableUse(StringRef &Expr, const SourceMgr &SM) const;
-  /// Parses \p Expr for a binary operation.
-  /// \returns the class representing the binary operation of the expression,
-  /// or an error holding a diagnostic against \p SM otherwise.
-  Expected<FileCheckExpression *> parseBinop(StringRef &Expr,
-                                             const SourceMgr &SM) const;
+  Expected<std::unique_ptr<FileCheckExpressionAST>>
+  parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+                      const SourceMgr &SM) const;
+  /// Parses \p Expr for a binary operation. The left operand of this binary
+  /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether
+  /// we are parsing a legacy @LINE expression. \returns the class representing
+  /// the binary operation in the AST of the expression, or an error holding a
+  /// diagnostic against \p SM otherwise.
+  Expected<std::unique_ptr<FileCheckExpressionAST>>
+  parseBinop(StringRef &Expr, std::unique_ptr<FileCheckExpressionAST> LeftOp,
+             bool IsLegacyLineExpr, const SourceMgr &SM) const;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp
index 5ec126f934e65..9fb4d798849d7 100644
--- a/llvm/lib/Support/FileCheck.cpp
+++ b/llvm/lib/Support/FileCheck.cpp
@@ -35,17 +35,33 @@ void FileCheckNumericVariable::clearValue() {
   Value = None;
 }
 
-Expected<uint64_t> FileCheckExpression::eval() const {
-  assert(LeftOp && "Evaluating an empty expression");
-  Optional<uint64_t> LeftOpValue = LeftOp->getValue();
-  // Variable is undefined.
-  if (!LeftOpValue)
-    return make_error<FileCheckUndefVarError>(LeftOp->getName());
-  return EvalBinop(*LeftOpValue, RightOp);
+Expected<uint64_t> FileCheckNumericVariableUse::eval() const {
+  Optional<uint64_t> Value = NumericVariable->getValue();
+  if (Value)
+    return *Value;
+  return make_error<FileCheckUndefVarError>(Name);
+}
+
+Expected<uint64_t> FileCheckASTBinop::eval() const {
+  Expected<uint64_t> LeftOp = LeftOperand->eval();
+  Expected<uint64_t> RightOp = RightOperand->eval();
+
+  // Bubble up any error (e.g. undefined variables) in the recursive
+  // evaluation.
+  if (!LeftOp || !RightOp) {
+    Error Err = Error::success();
+    if (!LeftOp)
+      Err = joinErrors(std::move(Err), LeftOp.takeError());
+    if (!RightOp)
+      Err = joinErrors(std::move(Err), RightOp.takeError());
+    return std::move(Err);
+  }
+
+  return EvalBinop(*LeftOp, *RightOp);
 }
 
 Expected<std::string> FileCheckNumericSubstitution::getResult() const {
-  Expected<uint64_t> EvaluatedValue = Expression->eval();
+  Expected<uint64_t> EvaluatedValue = ExpressionAST->eval();
   if (!EvaluatedValue)
     return EvaluatedValue.takeError();
   return utostr(*EvaluatedValue);
@@ -63,15 +79,14 @@ bool FileCheckPattern::isValidVarNameStart(char C) {
   return C == '_' || isalpha(C);
 }
 
-Expected<StringRef> FileCheckPattern::parseVariable(StringRef &Str,
-                                                    bool &IsPseudo,
-                                                    const SourceMgr &SM) {
+Expected<FileCheckPattern::VariableProperties>
+FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
   if (Str.empty())
     return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name");
 
   bool ParsedOneChar = false;
   unsigned I = 0;
-  IsPseudo = Str[0] == '@';
+  bool IsPseudo = Str[0] == '@';
 
   // Global vars start with '$'.
   if (Str[0] == '$' || IsPseudo)
@@ -89,7 +104,7 @@ Expected<StringRef> FileCheckPattern::parseVariable(StringRef &Str,
 
   StringRef Name = Str.take_front(I);
   Str = Str.substr(I);
-  return Name;
+  return VariableProperties {Name, IsPseudo};
 }
 
 // StringRef holding all characters considered as horizontal whitespaces by
@@ -111,13 +126,12 @@ Expected<FileCheckNumericVariable *>
 FileCheckPattern::parseNumericVariableDefinition(
     StringRef &Expr, FileCheckPatternContext *Context, size_t LineNumber,
     const SourceMgr &SM) {
-  bool IsPseudo;
-  Expected<StringRef> ParseVarResult = parseVariable(Expr, IsPseudo, SM);
+  Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
   if (!ParseVarResult)
     return ParseVarResult.takeError();
-  StringRef Name = *ParseVarResult;
+  StringRef Name = ParseVarResult->Name;
 
-  if (IsPseudo)
+  if (ParseVarResult->IsPseudo)
     return FileCheckErrorDiagnostic::get(
         SM, Name, "definition of pseudo numeric variable unsupported");
 
@@ -143,15 +157,9 @@ FileCheckPattern::parseNumericVariableDefinition(
   return DefinedNumericVariable;
 }
 
-Expected<FileCheckNumericVariable *>
-FileCheckPattern::parseNumericVariableUse(StringRef &Expr,
+Expected<std::unique_ptr<FileCheckNumericVariableUse>>
+FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
                                           const SourceMgr &SM) const {
-  bool IsPseudo;
-  Expected<StringRef> ParseVarResult = parseVariable(Expr, IsPseudo, SM);
-  if (!ParseVarResult)
-    return ParseVarResult.takeError();
-  StringRef Name = *ParseVarResult;
-
   if (IsPseudo && !Name.equals("@LINE"))
     return FileCheckErrorDiagnostic::get(
         SM, Name, "invalid pseudo numeric variable '" + Name + "'");
@@ -178,7 +186,32 @@ FileCheckPattern::parseNumericVariableUse(StringRef &Expr,
         SM, Name,
         "numeric variable '" + Name + "' defined on the same line as used");
 
-  return NumericVariable;
+  return llvm::make_unique<FileCheckNumericVariableUse>(Name, NumericVariable);
+}
+
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO,
+                                      const SourceMgr &SM) const {
+  if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
+    // Try to parse as a numeric variable use.
+    Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+        parseVariable(Expr, SM);
+    if (ParseVarResult)
+      return parseNumericVariableUse(ParseVarResult->Name,
+                                     ParseVarResult->IsPseudo, SM);
+    if (AO == AllowedOperand::LineVar)
+      return ParseVarResult.takeError();
+    // Ignore the error and retry parsing as a literal.
+    consumeError(ParseVarResult.takeError());
+  }
+
+  // Otherwise, parse it as a literal.
+  uint64_t LiteralValue;
+  if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
+    return llvm::make_unique<FileCheckExpressionLiteral>(LiteralValue);
+
+  return FileCheckErrorDiagnostic::get(SM, Expr,
+                                       "invalid operand format '" + Expr + "'");
 }
 
 static uint64_t add(uint64_t LeftOp, uint64_t RightOp) {
@@ -189,20 +222,16 @@ static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
   return LeftOp - RightOp;
 }
 
-Expected<FileCheckExpression *>
-FileCheckPattern::parseBinop(StringRef &Expr, const SourceMgr &SM) const {
-  Expected<FileCheckNumericVariable *> LeftParseResult =
-      parseNumericVariableUse(Expr, SM);
-  if (!LeftParseResult) {
-    return LeftParseResult.takeError();
-  }
-  FileCheckNumericVariable *LeftOp = *LeftParseResult;
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseBinop(StringRef &Expr,
+                             std::unique_ptr<FileCheckExpressionAST> LeftOp,
+                             bool IsLegacyLineExpr, const SourceMgr &SM) const {
+  Expr = Expr.ltrim(SpaceChars);
+  if (Expr.empty())
+    return std::move(LeftOp);
 
   // Check if this is a supported operation and select a function to perform
   // it.
-  Expr = Expr.ltrim(SpaceChars);
-  if (Expr.empty())
-    return Context->makeExpression(add, LeftOp, 0);
   SMLoc OpLoc = SMLoc::getFromPointer(Expr.data());
   char Operator = popFront(Expr);
   binop_eval_t EvalBinop;
@@ -223,22 +252,24 @@ FileCheckPattern::parseBinop(StringRef &Expr, const SourceMgr &SM) const {
   if (Expr.empty())
     return FileCheckErrorDiagnostic::get(SM, Expr,
                                          "missing operand in expression");
-  uint64_t RightOp;
-  if (Expr.consumeInteger(10, RightOp))
-    return FileCheckErrorDiagnostic::get(
-        SM, Expr, "invalid offset in expression '" + Expr + "'");
-  Expr = Expr.ltrim(SpaceChars);
-  if (!Expr.empty())
-    return FileCheckErrorDiagnostic::get(
-        SM, Expr, "unexpected characters at end of expression '" + Expr + "'");
+  // The second operand in a legacy @LINE expression is always a literal.
+  AllowedOperand AO =
+      IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
+  Expected<std::unique_ptr<FileCheckExpressionAST>> RightOpResult =
+      parseNumericOperand(Expr, AO, SM);
+  if (!RightOpResult)
+    return RightOpResult;
 
-  return Context->makeExpression(EvalBinop, LeftOp, RightOp);
+  Expr = Expr.ltrim(SpaceChars);
+  return llvm::make_unique<FileCheckASTBinop>(EvalBinop, std::move(LeftOp),
+                                              std::move(*RightOpResult));
 }
 
-Expected<FileCheckExpression *> FileCheckPattern::parseNumericSubstitutionBlock(
+Expected<std::unique_ptr<FileCheckExpressionAST>>
+FileCheckPattern::parseNumericSubstitutionBlock(
     StringRef Expr,
     Optional<FileCheckNumericVariable *> &DefinedNumericVariable,
-    const SourceMgr &SM) const {
+    bool IsLegacyLineExpr, const SourceMgr &SM) const {
   // Parse the numeric variable definition.
   DefinedNumericVariable = None;
   size_t DefEnd = Expr.find(':');
@@ -259,12 +290,29 @@ Expected<FileCheckExpression *> FileCheckPattern::parseNumericSubstitutionBlock(
       return ParseResult.takeError();
     DefinedNumericVariable = *ParseResult;
 
-    return Context->makeExpression(add, nullptr, 0);
+    return nullptr;
   }
 
   // Parse the expression itself.
   Expr = Expr.ltrim(SpaceChars);
-  return parseBinop(Expr, SM);
+  // The first operand in a legacy @LINE expression is always the @LINE pseudo
+  // variable.
+  AllowedOperand AO =
+      IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
+  Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+      parseNumericOperand(Expr, AO, SM);
+  while (ParseResult && !Expr.empty()) {
+    ParseResult =
+        parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM);
+    // Legacy @LINE expressions only allow 2 operands.
+    if (ParseResult && IsLegacyLineExpr && !Expr.empty())
+      return FileCheckErrorDiagnostic::get(
+          SM, Expr,
+          "unexpected characters at end of expression '" + Expr + "'");
+  }
+  if (!ParseResult)
+    return ParseResult;
+  return std::move(*ParseResult);
 }
 
 bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
@@ -375,12 +423,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
       PatternStr = UnparsedPatternStr.substr(End + 2);
 
       bool IsDefinition = false;
+      // Whether the substitution block is a legacy use of @LINE with string
+      // substitution block syntax.
+      bool IsLegacyLineExpr = false;
       StringRef DefName;
       StringRef SubstStr;
       StringRef MatchRegexp;
       size_t SubstInsertIdx = RegExStr.size();
 
-      // Parse string variable or legacy expression.
+      // Parse string variable or legacy @LINE expression.
       if (!IsNumBlock) {
         size_t VarEndIdx = MatchStr.find(":");
         size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
@@ -391,15 +442,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
         }
 
         // Get the name (e.g. "foo") and verify it is well formed.
-        bool IsPseudo;
         StringRef OrigMatchStr = MatchStr;
-        Expected<StringRef> ParseVarResult =
-            parseVariable(MatchStr, IsPseudo, SM);
+        Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+            parseVariable(MatchStr, SM);
         if (!ParseVarResult) {
           logAllUnhandledErrors(ParseVarResult.takeError(), errs());
           return true;
         }
-        StringRef Name = *ParseVarResult;
+        StringRef Name = ParseVarResult->Name;
+        bool IsPseudo = ParseVarResult->IsPseudo;
 
         IsDefinition = (VarEndIdx != StringRef::npos);
         if (IsDefinition) {
@@ -424,23 +475,24 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
         } else {
           if (IsPseudo) {
             MatchStr = OrigMatchStr;
-            IsNumBlock = true;
+            IsLegacyLineExpr = IsNumBlock = true;
           } else
             SubstStr = Name;
         }
       }
 
       // Parse numeric substitution block.
-      FileCheckExpression *Expression;
+      std::unique_ptr<FileCheckExpressionAST> ExpressionAST;
       Optional<FileCheckNumericVariable *> DefinedNumericVariable;
       if (IsNumBlock) {
-        Expected<FileCheckExpression *> ParseResult =
-            parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, SM);
+        Expected<std::unique_ptr<FileCheckExpressionAST>> ParseResult =
+            parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
+                                          IsLegacyLineExpr, SM);
         if (!ParseResult) {
           logAllUnhandledErrors(ParseResult.takeError(), errs());
           return true;
         }
-        Expression = *ParseResult;
+        ExpressionAST = std::move(*ParseResult);
         if (DefinedNumericVariable) {
           IsDefinition = true;
           DefName = (*DefinedNumericVariable)->getName();
@@ -468,8 +520,8 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix,
           // previous CHECK patterns, and substitution of expressions.
           FileCheckSubstitution *Substitution =
               IsNumBlock
-                  ? Context->makeNumericSubstitution(SubstStr, Expression,
-                                                     SubstInsertIdx)
+                  ? Context->makeNumericSubstitution(
+                        SubstStr, std::move(ExpressionAST), SubstInsertIdx)
                   : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
           Substitutions.push_back(Substitution);
         }
@@ -660,7 +712,7 @@ void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
       Expected<std::string> MatchedValue = Substitution->getResult();
 
       // Substitution failed or is not known at match time, print the undefined
-      // variable it uses.
+      // variables it uses.
       if (!MatchedValue) {
         bool UndefSeen = false;
         handleAllErrors(MatchedValue.takeError(),
@@ -669,13 +721,11 @@ void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
                         [](const FileCheckErrorDiagnostic &E) {},
                         [&](const FileCheckUndefVarError &E) {
                           if (!UndefSeen) {
-                            OS << "uses undefined variable ";
+                            OS << "uses undefined variable(s):";
                             UndefSeen = true;
                           }
+                          OS << " ";
                           E.log(OS);
-                        },
-                        [](const ErrorInfoBase &E) {
-                          llvm_unreachable("Unexpected error");
                         });
       } else {
         // Substitution succeeded. Print substituted value.
@@ -768,15 +818,6 @@ FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
   return VarIter->second;
 }
 
-FileCheckExpression *
-FileCheckPatternContext::makeExpression(binop_eval_t EvalBinop,
-                                        FileCheckNumericVariable *OperandLeft,
-                                        uint64_t OperandRight) {
-  Expressions.push_back(llvm::make_unique<FileCheckExpression>(
-      EvalBinop, OperandLeft, OperandRight));
-  return Expressions.back().get();
-}
-
 template <class... Types>
 FileCheckNumericVariable *
 FileCheckPatternContext::makeNumericVariable(Types... args) {
@@ -794,10 +835,10 @@ FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
 }
 
 FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution(
-    StringRef ExpressionStr, FileCheckExpression *Expression,
-    size_t InsertIdx) {
+    StringRef ExpressionStr,
+    std::unique_ptr<FileCheckExpressionAST> ExpressionAST, size_t InsertIdx) {
   Substitutions.push_back(llvm::make_unique<FileCheckNumericSubstitution>(
-      this, ExpressionStr, Expression, InsertIdx));
+      this, ExpressionStr, std::move(ExpressionAST), InsertIdx));
   return Substitutions.back().get();
 }
 
@@ -1777,9 +1818,8 @@ Error FileCheckPatternContext::defineCmdlineVariables(
       std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
       StringRef CmdlineName = CmdlineNameVal.first;
       StringRef OrigCmdlineName = CmdlineName;
-      bool IsPseudo;
-      Expected<StringRef> ParseVarResult =
-          FileCheckPattern::parseVariable(CmdlineName, IsPseudo, SM);
+      Expected<FileCheckPattern::VariableProperties> ParseVarResult =
+          FileCheckPattern::parseVariable(CmdlineName, SM);
       if (!ParseVarResult) {
         Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
         continue;
@@ -1787,7 +1827,7 @@ Error FileCheckPatternContext::defineCmdlineVariables(
       // Check that CmdlineName does not denote a pseudo variable is only
       // composed of the parsed numeric variable. This catches cases like
       // "FOO+2" in a "FOO+2=10" definition.
-      if (IsPseudo || !CmdlineName.empty()) {
+      if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
         Errs = joinErrors(std::move(Errs),
                           FileCheckErrorDiagnostic::get(
                               SM, OrigCmdlineName,
@@ -1795,7 +1835,7 @@ Error FileCheckPatternContext::defineCmdlineVariables(
                                   OrigCmdlineName + "'"));
         continue;
       }
-      StringRef Name = *ParseVarResult;
+      StringRef Name = ParseVarResult->Name;
 
       // Detect collisions between string and numeric variables when the former
       // is created later than the latter.
diff --git a/llvm/test/FileCheck/line-count.txt b/llvm/test/FileCheck/line-count.txt
index 7b34e00bef404..0c7be7ebc99b9 100644
--- a/llvm/test/FileCheck/line-count.txt
+++ b/llvm/test/FileCheck/line-count.txt
@@ -50,7 +50,7 @@
 50 ERR9: line-count.txt:[[#@LINE-1]]:17: error: unsupported operation '*'
 51
 52 BAD10: [[@LINE-x]]
-53 ERR10: line-count.txt:[[#@LINE-1]]:19: error: invalid offset in expression 'x'
+53 ERR10: line-count.txt:[[#@LINE-1]]:19: error: invalid operand format 'x'
 54
 55 BAD11: [[@LINE-1x]]
 56 ERR11: line-count.txt:[[#@LINE-1]]:20: error: unexpected characters at end of expression 'x'
diff --git a/llvm/test/FileCheck/numeric-expression.txt b/llvm/test/FileCheck/numeric-expression.txt
index 5e10d31dc2a0e..3ff7519e51193 100644
--- a/llvm/test/FileCheck/numeric-expression.txt
+++ b/llvm/test/FileCheck/numeric-expression.txt
@@ -59,8 +59,8 @@ CHECK-NEXT: [[# VAR1 -1]]
 CHECK-NEXT: [[# VAR1 - 1]]
 CHECK-NEXT: [[# VAR1 - 1 ]]
 
-; Numeric expressions using variables defined on the command-line and an
-; immediate interpreted as an unsigned value.
+; Numeric expressions using variables defined on other lines and an immediate
+; interpreted as an unsigned value.
 ; Note: 9223372036854775819 = 0x8000000000000000 + 11
 ;       9223372036854775808 = 0x8000000000000000
 USE UNSIGNED IMM
@@ -68,21 +68,29 @@ USE UNSIGNED IMM
 CHECK-LABEL: USE UNSIGNED IMM
 CHECK-NEXT: [[#VAR1+9223372036854775808]]
 
-; Numeric expression using undefined variable.
+; Numeric expressions using more than one variable defined on other lines.
+USE MULTI VAR
+31
+42
+CHECK-LABEL: USE MULTI VAR
+CHECK-NEXT: [[#VAR2:]]
+CHECK-NEXT: [[#VAR1+VAR2]]
+
+; Numeric expression using undefined variables.
 RUN: not FileCheck --check-prefix UNDEF-USE --input-file %s %s 2>&1 \
 RUN:   | FileCheck --strict-whitespace --check-prefix UNDEF-USE-MSG %s
 
 UNDEF VAR USE
 UNDEFVAR: 11
 UNDEF-USE-LABEL: UNDEF VAR USE
-UNDEF-USE-NEXT: UNDEFVAR: [[#UNDEFVAR]]
+UNDEF-USE-NEXT: UNDEFVAR: [[#UNDEFVAR1+UNDEFVAR2]]
 UNDEF-USE-MSG: numeric-expression.txt:[[#@LINE-1]]:17: error: {{U}}NDEF-USE-NEXT: expected string not found in input
-UNDEF-USE-MSG-NEXT: {{U}}NDEF-USE-NEXT: UNDEFVAR: {{\[\[#UNDEFVAR\]\]}}
+UNDEF-USE-MSG-NEXT: {{U}}NDEF-USE-NEXT: UNDEFVAR: {{\[\[#UNDEFVAR1\+UNDEFVAR2\]\]}}
 UNDEF-USE-MSG-NEXT: {{^                \^$}}
 UNDEF-USE-MSG-NEXT: numeric-expression.txt:[[#@LINE-6]]:1: note: scanning from here
 UNDEF-USE-MSG-NEXT: UNDEFVAR: 11
 UNDEF-USE-MSG-NEXT: {{^\^$}}
-UNDEF-USE-MSG-NEXT: numeric-expression.txt:[[#@LINE-9]]:1: note: uses undefined variable "UNDEFVAR"
+UNDEF-USE-MSG-NEXT: numeric-expression.txt:[[#@LINE-9]]:1: note: uses undefined variable(s): "UNDEFVAR1" "UNDEFVAR2"
 UNDEF-USE-MSG-NEXT: UNDEFVAR: 11
 UNDEF-USE-MSG-NEXT: {{^\^$}}
 
diff --git a/llvm/test/FileCheck/var-scope.txt b/llvm/test/FileCheck/var-scope.txt
index c45a384812659..3fa8a73e157d8 100644
--- a/llvm/test/FileCheck/var-scope.txt
+++ b/llvm/test/FileCheck/var-scope.txt
@@ -34,5 +34,5 @@ LOCAL3: [[LOCAL]][[#LOCNUM+2]]
 GLOBAL: [[$GLOBAL]][[#$GLOBNUM+2]]
 
 ERRUNDEF: expected string not found in input
-ERRUNDEFLOCAL: uses undefined variable "LOCAL"
-ERRUNDEFLOCNUM: uses undefined variable "LOCNUM"
+ERRUNDEFLOCAL: uses undefined variable(s): "LOCAL"
+ERRUNDEFLOCNUM: uses undefined variable(s): "LOCNUM"
diff --git a/llvm/unittests/Support/FileCheckTest.cpp b/llvm/unittests/Support/FileCheckTest.cpp
index 0cc729da46acb..8df4603dbd118 100644
--- a/llvm/unittests/Support/FileCheckTest.cpp
+++ b/llvm/unittests/Support/FileCheckTest.cpp
@@ -8,56 +8,112 @@
 
 #include "llvm/Support/FileCheck.h"
 #include "gtest/gtest.h"
+#include <unordered_set>
 
 using namespace llvm;
 namespace {
 
 class FileCheckTest : public ::testing::Test {};
 
+TEST_F(FileCheckTest, Literal) {
+  // Eval returns the literal's value.
+  FileCheckExpressionLiteral Ten(10);
+  Expected<uint64_t> Value = Ten.eval();
+  EXPECT_TRUE(bool(Value));
+  EXPECT_EQ(10U, *Value);
+
+  // Max value can be correctly represented.
+  FileCheckExpressionLiteral Max(std::numeric_limits<uint64_t>::max());
+  Value = Max.eval();
+  EXPECT_TRUE(bool(Value));
+  EXPECT_EQ(std::numeric_limits<uint64_t>::max(), *Value);
+}
+
+static std::string toString(const std::unordered_set<std::string> &Set) {
+  bool First = true;
+  std::string Str;
+  for (StringRef S : Set) {
+    Str += Twine(First ? "{" + S : ", " + S).str();
+    First = false;
+  }
+  Str += '}';
+  return Str;
+}
+
+static void
+expectUndefErrors(std::unordered_set<std::string> ExpectedUndefVarNames,
+                  Error Err) {
+  handleAllErrors(std::move(Err), [&](const FileCheckUndefVarError &E) {
+    ExpectedUndefVarNames.erase(E.getVarName());
+  });
+  EXPECT_TRUE(ExpectedUndefVarNames.empty()) << toString(ExpectedUndefVarNames);
+}
+
+static void expectUndefError(const Twine &ExpectedUndefVarName, Error Err) {
+  expectUndefErrors({ExpectedUndefVarName.str()}, std::move(Err));
+}
+
 TEST_F(FileCheckTest, NumericVariable) {
-  // Undefined variable: getValue fails, setValue does not trigger assert.
+  // Undefined variable: getValue and eval fail, error returned by eval holds
+  // the name of the undefined variable and setValue does not trigger assert.
   FileCheckNumericVariable FooVar = FileCheckNumericVariable(1, "FOO");
   EXPECT_EQ("FOO", FooVar.getName());
-  llvm::Optional<uint64_t> Value = FooVar.getValue();
-  EXPECT_FALSE(Value);
-  FooVar.clearValue();
+  FileCheckNumericVariableUse FooVarUse =
+      FileCheckNumericVariableUse("FOO", &FooVar);
+  EXPECT_FALSE(FooVar.getValue());
+  Expected<uint64_t> EvalResult = FooVarUse.eval();
+  EXPECT_FALSE(EvalResult);
+  expectUndefError("FOO", EvalResult.takeError());
   FooVar.setValue(42);
 
-  // Defined variable: getValue returns value set.
-  Value = FooVar.getValue();
-  EXPECT_TRUE(Value);
+  // Defined variable: getValue and eval return value set.
+  Optional<uint64_t> Value = FooVar.getValue();
+  EXPECT_TRUE(bool(Value));
   EXPECT_EQ(42U, *Value);
+  EvalResult = FooVarUse.eval();
+  EXPECT_TRUE(bool(EvalResult));
+  EXPECT_EQ(42U, *EvalResult);
 
-  // Clearing variable: getValue fails.
+  // Clearing variable: getValue and eval fail. Error returned by eval holds
+  // the name of the cleared variable.
   FooVar.clearValue();
   Value = FooVar.getValue();
   EXPECT_FALSE(Value);
+  EvalResult = FooVarUse.eval();
+  EXPECT_FALSE(EvalResult);
+  expectUndefError("FOO", EvalResult.takeError());
 }
 
 uint64_t doAdd(uint64_t OpL, uint64_t OpR) { return OpL + OpR; }
 
-static void expectUndefError(const Twine &ExpectedStr, Error Err) {
-  handleAllErrors(std::move(Err), [&](const FileCheckUndefVarError &E) {
-    EXPECT_EQ(ExpectedStr.str(), E.getVarName());
-  });
-}
-
-TEST_F(FileCheckTest, Expression) {
+TEST_F(FileCheckTest, Binop) {
   FileCheckNumericVariable FooVar = FileCheckNumericVariable("FOO", 42);
-  FileCheckExpression Expression = FileCheckExpression(doAdd, &FooVar, 18);
+  std::unique_ptr<FileCheckNumericVariableUse> FooVarUse =
+      llvm::make_unique<FileCheckNumericVariableUse>("FOO", &FooVar);
+  FileCheckNumericVariable BarVar = FileCheckNumericVariable("BAR", 18);
+  std::unique_ptr<FileCheckNumericVariableUse> BarVarUse =
+      llvm::make_unique<FileCheckNumericVariableUse>("BAR", &BarVar);
+  FileCheckASTBinop Binop =
+      FileCheckASTBinop(doAdd, std::move(FooVarUse), std::move(BarVarUse));
 
   // Defined variable: eval returns right value.
-  Expected<uint64_t> Value = Expression.eval();
+  Expected<uint64_t> Value = Binop.eval();
   EXPECT_TRUE(bool(Value));
   EXPECT_EQ(60U, *Value);
 
-  // Undefined variable: eval fails, undefined variable returned. We call
-  // getUndefVarName first to check that it can be called without calling
-  // eval() first.
+  // 1 undefined variable: eval fails, error contains name of undefined
+  // variable.
   FooVar.clearValue();
-  Error EvalError = Expression.eval().takeError();
-  EXPECT_TRUE(errorToBool(std::move(EvalError)));
-  expectUndefError("FOO", std::move(EvalError));
+  Value = Binop.eval();
+  EXPECT_FALSE(Value);
+  expectUndefError("FOO", Value.takeError());
+
+  // 2 undefined variables: eval fails, error contains names of all undefined
+  // variables.
+  BarVar.clearValue();
+  Value = Binop.eval();
+  EXPECT_FALSE(Value);
+  expectUndefErrors({"FOO", "BAR"}, Value.takeError());
 }
 
 TEST_F(FileCheckTest, ValidVarNameStart) {
@@ -84,77 +140,69 @@ TEST_F(FileCheckTest, ParseVar) {
   SourceMgr SM;
   StringRef OrigVarName = bufferize(SM, "GoodVar42");
   StringRef VarName = OrigVarName;
-  bool IsPseudo = true;
-  Expected<StringRef> ParsedName =
-      FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
-  EXPECT_EQ(*ParsedName, OrigVarName);
+  Expected<FileCheckPattern::VariableProperties> ParsedVarResult =
+      FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
+  EXPECT_EQ(ParsedVarResult->Name, OrigVarName);
   EXPECT_TRUE(VarName.empty());
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 
   VarName = OrigVarName = bufferize(SM, "$GoodGlobalVar");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
-  EXPECT_EQ(*ParsedName, OrigVarName);
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
+  EXPECT_EQ(ParsedVarResult->Name, OrigVarName);
   EXPECT_TRUE(VarName.empty());
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 
   VarName = OrigVarName = bufferize(SM, "@GoodPseudoVar");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
-  EXPECT_EQ(*ParsedName, OrigVarName);
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
+  EXPECT_EQ(ParsedVarResult->Name, OrigVarName);
   EXPECT_TRUE(VarName.empty());
-  EXPECT_TRUE(IsPseudo);
+  EXPECT_TRUE(ParsedVarResult->IsPseudo);
 
   VarName = bufferize(SM, "42BadVar");
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(errorToBool(ParsedName.takeError()));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(errorToBool(ParsedVarResult.takeError()));
 
   VarName = bufferize(SM, "$@");
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(errorToBool(ParsedName.takeError()));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(errorToBool(ParsedVarResult.takeError()));
 
   VarName = OrigVarName = bufferize(SM, "B@dVar");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
   EXPECT_EQ(VarName, OrigVarName.substr(1));
-  EXPECT_EQ(*ParsedName, "B");
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(ParsedVarResult->Name, "B");
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 
   VarName = OrigVarName = bufferize(SM, "B$dVar");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
   EXPECT_EQ(VarName, OrigVarName.substr(1));
-  EXPECT_EQ(*ParsedName, "B");
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(ParsedVarResult->Name, "B");
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 
   VarName = bufferize(SM, "BadVar+");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
   EXPECT_EQ(VarName, "+");
-  EXPECT_EQ(*ParsedName, "BadVar");
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(ParsedVarResult->Name, "BadVar");
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 
   VarName = bufferize(SM, "BadVar-");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
   EXPECT_EQ(VarName, "-");
-  EXPECT_EQ(*ParsedName, "BadVar");
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(ParsedVarResult->Name, "BadVar");
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 
   VarName = bufferize(SM, "BadVar:");
-  IsPseudo = true;
-  ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM);
-  EXPECT_TRUE(bool(ParsedName));
+  ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM);
+  EXPECT_TRUE(bool(ParsedVarResult));
   EXPECT_EQ(VarName, ":");
-  EXPECT_EQ(*ParsedName, "BadVar");
-  EXPECT_FALSE(IsPseudo);
+  EXPECT_EQ(ParsedVarResult->Name, "BadVar");
+  EXPECT_FALSE(ParsedVarResult->IsPseudo);
 }
 
 class PatternTester {
@@ -197,7 +245,7 @@ class PatternTester {
     StringRef ExprBufferRef = bufferize(SM, Expr);
     Optional<FileCheckNumericVariable *> DefinedNumericVariable;
     return errorToBool(P.parseNumericSubstitutionBlock(
-                            ExprBufferRef, DefinedNumericVariable, SM)
+                            ExprBufferRef, DefinedNumericVariable, false, SM)
                            .takeError());
   }
 
@@ -269,15 +317,12 @@ TEST_F(FileCheckTest, ParseExpr) {
   // Missing offset operand.
   EXPECT_TRUE(Tester.parseSubstExpect("@LINE+"));
 
-  // Cannot parse offset operand.
-  EXPECT_TRUE(Tester.parseSubstExpect("@LINE+x"));
-
-  // Unexpected string at end of numeric expression.
-  EXPECT_TRUE(Tester.parseSubstExpect("@LINE+5x"));
-
   // Valid expression.
   EXPECT_FALSE(Tester.parseSubstExpect("@LINE+5"));
   EXPECT_FALSE(Tester.parseSubstExpect("FOO+4"));
+  Tester.initNextPattern();
+  EXPECT_FALSE(Tester.parsePatternExpect("[[#FOO+FOO]]"));
+  EXPECT_FALSE(Tester.parsePatternExpect("[[#FOO+3-FOO]]"));
 }
 
 TEST_F(FileCheckTest, ParsePattern) {
@@ -306,7 +351,6 @@ TEST_F(FileCheckTest, ParsePattern) {
   EXPECT_TRUE(Tester.parsePatternExpect("[[#42INVALID]]"));
   EXPECT_TRUE(Tester.parsePatternExpect("[[#@FOO]]"));
   EXPECT_TRUE(Tester.parsePatternExpect("[[#@LINE/2]]"));
-  EXPECT_TRUE(Tester.parsePatternExpect("[[#2+@LINE]]"));
   EXPECT_TRUE(Tester.parsePatternExpect("[[#YUP:@LINE]]"));
 
   // Valid numeric expressions and numeric variable definition.
@@ -365,35 +409,37 @@ TEST_F(FileCheckTest, Substitution) {
   // the right value.
   FileCheckNumericVariable LineVar = FileCheckNumericVariable("@LINE", 42);
   FileCheckNumericVariable NVar = FileCheckNumericVariable("N", 10);
-  FileCheckExpression LineExpression = FileCheckExpression(doAdd, &LineVar, 0);
-  FileCheckExpression NExpression = FileCheckExpression(doAdd, &NVar, 3);
-  FileCheckNumericSubstitution SubstitutionLine =
-      FileCheckNumericSubstitution(&Context, "@LINE", &LineExpression, 12);
+  auto LineVarUse =
+      llvm::make_unique<FileCheckNumericVariableUse>("@LINE", &LineVar);
+  auto NVarUse = llvm::make_unique<FileCheckNumericVariableUse>("N", &NVar);
+  FileCheckNumericSubstitution SubstitutionLine = FileCheckNumericSubstitution(
+      &Context, "@LINE", std::move(LineVarUse), 12);
   FileCheckNumericSubstitution SubstitutionN =
-      FileCheckNumericSubstitution(&Context, "N", &NExpression, 30);
-  Expected<std::string> Value = SubstitutionLine.getResult();
-  EXPECT_TRUE(bool(Value));
-  EXPECT_EQ("42", *Value);
-  Value = SubstitutionN.getResult();
-  EXPECT_TRUE(bool(Value));
-  EXPECT_EQ("13", *Value);
-
-  // Substitution of an undefined numeric variable fails.
+      FileCheckNumericSubstitution(&Context, "N", std::move(NVarUse), 30);
+  SubstValue = SubstitutionLine.getResult();
+  EXPECT_TRUE(bool(SubstValue));
+  EXPECT_EQ("42", *SubstValue);
+  SubstValue = SubstitutionN.getResult();
+  EXPECT_TRUE(bool(SubstValue));
+  EXPECT_EQ("10", *SubstValue);
+
+  // Substitution of an undefined numeric variable fails, error holds name of
+  // undefined variable.
   LineVar.clearValue();
-  SubstValue = SubstitutionLine.getResult().takeError();
+  SubstValue = SubstitutionLine.getResult();
   EXPECT_FALSE(bool(SubstValue));
   expectUndefError("@LINE", SubstValue.takeError());
   NVar.clearValue();
-  SubstValue = SubstitutionN.getResult().takeError();
+  SubstValue = SubstitutionN.getResult();
   EXPECT_FALSE(bool(SubstValue));
   expectUndefError("N", SubstValue.takeError());
 
   // Substitution of a defined string variable returns the right value.
   FileCheckPattern P = FileCheckPattern(Check::CheckPlain, &Context, 1);
   StringSubstitution = FileCheckStringSubstitution(&Context, "FOO", 42);
-  Value = StringSubstitution.getResult();
-  EXPECT_TRUE(bool(Value));
-  EXPECT_EQ("BAR", *Value);
+  SubstValue = StringSubstitution.getResult();
+  EXPECT_TRUE(bool(SubstValue));
+  EXPECT_EQ("BAR", *SubstValue);
 }
 
 TEST_F(FileCheckTest, FileCheckContext) {
@@ -456,14 +502,15 @@ TEST_F(FileCheckTest, FileCheckContext) {
   Expected<StringRef> LocalVar = Cxt.getPatternVarValue(LocalVarStr);
   FileCheckPattern P = FileCheckPattern(Check::CheckPlain, &Cxt, 1);
   Optional<FileCheckNumericVariable *> DefinedNumericVariable;
-  Expected<FileCheckExpression *> Expression = P.parseNumericSubstitutionBlock(
-      LocalNumVarRef, DefinedNumericVariable, SM);
-  Expected<StringRef> EmptyVar = Cxt.getPatternVarValue(EmptyVarStr);
-  Expected<StringRef> UnknownVar = Cxt.getPatternVarValue(UnknownVarStr);
+  Expected<std::unique_ptr<FileCheckExpressionAST>> ExpressionAST =
+      P.parseNumericSubstitutionBlock(LocalNumVarRef, DefinedNumericVariable,
+                                      /*IsLegacyLineExpr=*/false, SM);
   EXPECT_TRUE(bool(LocalVar));
   EXPECT_EQ(*LocalVar, "FOO");
-  EXPECT_TRUE(bool(Expression));
-  Expected<uint64_t> ExpressionVal = (*Expression)->eval();
+  Expected<StringRef> EmptyVar = Cxt.getPatternVarValue(EmptyVarStr);
+  Expected<StringRef> UnknownVar = Cxt.getPatternVarValue(UnknownVarStr);
+  EXPECT_TRUE(bool(ExpressionAST));
+  Expected<uint64_t> ExpressionVal = (*ExpressionAST)->eval();
   EXPECT_TRUE(bool(ExpressionVal));
   EXPECT_EQ(*ExpressionVal, 18U);
   EXPECT_TRUE(bool(EmptyVar));
@@ -478,12 +525,12 @@ TEST_F(FileCheckTest, FileCheckContext) {
   // local variables, if it was created before. This is important because local
   // variable clearing due to --enable-var-scope happens after numeric
   // expressions are linked to the numeric variables they use.
-  EXPECT_TRUE(errorToBool((*Expression)->eval().takeError()));
+  EXPECT_TRUE(errorToBool((*ExpressionAST)->eval().takeError()));
   P = FileCheckPattern(Check::CheckPlain, &Cxt, 2);
-  Expression = P.parseNumericSubstitutionBlock(LocalNumVarRef,
-                                               DefinedNumericVariable, SM);
-  EXPECT_TRUE(bool(Expression));
-  ExpressionVal = (*Expression)->eval();
+  ExpressionAST = P.parseNumericSubstitutionBlock(
+      LocalNumVarRef, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, SM);
+  EXPECT_TRUE(bool(ExpressionAST));
+  ExpressionVal = (*ExpressionAST)->eval();
   EXPECT_TRUE(errorToBool(ExpressionVal.takeError()));
   EmptyVar = Cxt.getPatternVarValue(EmptyVarStr);
   EXPECT_TRUE(errorToBool(EmptyVar.takeError()));
@@ -501,10 +548,10 @@ TEST_F(FileCheckTest, FileCheckContext) {
   EXPECT_TRUE(bool(GlobalVar));
   EXPECT_EQ(*GlobalVar, "BAR");
   P = FileCheckPattern(Check::CheckPlain, &Cxt, 3);
-  Expression = P.parseNumericSubstitutionBlock(GlobalNumVarRef,
-                                               DefinedNumericVariable, SM);
-  EXPECT_TRUE(bool(Expression));
-  ExpressionVal = (*Expression)->eval();
+  ExpressionAST = P.parseNumericSubstitutionBlock(
+      GlobalNumVarRef, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, SM);
+  EXPECT_TRUE(bool(ExpressionAST));
+  ExpressionVal = (*ExpressionAST)->eval();
   EXPECT_TRUE(bool(ExpressionVal));
   EXPECT_EQ(*ExpressionVal, 36U);
 
@@ -512,10 +559,10 @@ TEST_F(FileCheckTest, FileCheckContext) {
   Cxt.clearLocalVars();
   EXPECT_FALSE(errorToBool(Cxt.getPatternVarValue(GlobalVarStr).takeError()));
   P = FileCheckPattern(Check::CheckPlain, &Cxt, 4);
-  Expression = P.parseNumericSubstitutionBlock(GlobalNumVarRef,
-                                               DefinedNumericVariable, SM);
-  EXPECT_TRUE(bool(Expression));
-  ExpressionVal = (*Expression)->eval();
+  ExpressionAST = P.parseNumericSubstitutionBlock(
+      GlobalNumVarRef, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, SM);
+  EXPECT_TRUE(bool(ExpressionAST));
+  ExpressionVal = (*ExpressionAST)->eval();
   EXPECT_TRUE(bool(ExpressionVal));
   EXPECT_EQ(*ExpressionVal, 36U);
 }

From ec8af0db6c97362a10994ab17ba9d175ac833f6c Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 13 Jul 2019 14:29:02 +0000
Subject: [PATCH 039/451] [ARM] MVE minnm and maxnm instructions

This adds the patterns for minnm and maxnm from the fminnum and fmaxnum nodes,
similar to scalar types.

Original patch by Simon Tatham

Differential Revision: https://reviews.llvm.org/D63870

llvm-svn: 366002
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 13 ++--
 llvm/lib/Target/ARM/ARMInstrMVE.td      | 15 ++++
 llvm/test/CodeGen/Thumb2/mve-minmax.ll  | 95 +++++++++++++++++++++++++
 3 files changed, 119 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-minmax.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index dd11ed6ede75d..b7c894c2a8abb 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -281,6 +281,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
 
     if (HasMVEFP) {
+      setOperationAction(ISD::FMINNUM, VT, Legal);
+      setOperationAction(ISD::FMAXNUM, VT, Legal);
+
       // No native support for these.
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
@@ -1254,10 +1257,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FRINT, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+    if (Subtarget->hasNEON()) {
+      setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+      setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+    }
 
     if (Subtarget->hasFP64()) {
       setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 07ba61798d4d1..7ae91423fecbe 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -863,9 +863,24 @@ class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
 def MVE_VMAXNMf32 : MVE_VMINMAXNM<"vmaxnm", "f32", 0b0, 0b0>;
 def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>;
 
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
 def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
 def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>;
 
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
+            (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
+  def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
+            (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+}
+
+
 class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
               bit bit_4, list<dag> pattern=[]>
   : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
new file mode 100644
index 0000000000000..38648afaabfa0
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
+
+define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
+; CHECK-MVE-LABEL: maxnm_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmaxnm.f32 s11, s7, s3
+; CHECK-MVE-NEXT:    vmaxnm.f32 s10, s6, s2
+; CHECK-MVE-NEXT:    vmaxnm.f32 s9, s5, s1
+; CHECK-MVE-NEXT:    vmaxnm.f32 s8, s4, s0
+; CHECK-MVE-NEXT:    vmov q0, q2
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: maxnm_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vmaxnm.f32 q0, q1, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %cmp = fcmp fast ogt <4 x float> %src2, %src1
+  %0 = select <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half> %src2) {
+; CHECK-MVE-LABEL: minnm_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[0]
+; CHECK-MVE-NEXT:    vmov s10, r0
+; CHECK-MVE-NEXT:    vmov.u16 r2, q1[1]
+; CHECK-MVE-NEXT:    vminnm.f16 s8, s10, s8
+; CHECK-MVE-NEXT:    vmov s10, r2
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov s8, r1
+; CHECK-MVE-NEXT:    vminnm.f16 s8, s10, s8
+; CHECK-MVE-NEXT:    vmov r1, s8
+; CHECK-MVE-NEXT:    vmov.16 q2[0], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.16 q2[1], r1
+; CHECK-MVE-NEXT:    vmov s12, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[2]
+; CHECK-MVE-NEXT:    vmov s14, r0
+; CHECK-MVE-NEXT:    vminnm.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov.16 q2[2], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-MVE-NEXT:    vmov s12, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[3]
+; CHECK-MVE-NEXT:    vmov s14, r0
+; CHECK-MVE-NEXT:    vminnm.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov.16 q2[3], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-MVE-NEXT:    vmov s12, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[4]
+; CHECK-MVE-NEXT:    vmov s14, r0
+; CHECK-MVE-NEXT:    vminnm.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov.16 q2[4], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-MVE-NEXT:    vmov s12, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[5]
+; CHECK-MVE-NEXT:    vmov s14, r0
+; CHECK-MVE-NEXT:    vminnm.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov.16 q2[5], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-MVE-NEXT:    vmov s12, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[6]
+; CHECK-MVE-NEXT:    vmov s14, r0
+; CHECK-MVE-NEXT:    vminnm.f16 s12, s14, s12
+; CHECK-MVE-NEXT:    vmov r0, s12
+; CHECK-MVE-NEXT:    vmov.16 q2[6], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-MVE-NEXT:    vmov s0, r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q1[7]
+; CHECK-MVE-NEXT:    vmov s2, r0
+; CHECK-MVE-NEXT:    vminnm.f16 s0, s2, s0
+; CHECK-MVE-NEXT:    vmov r0, s0
+; CHECK-MVE-NEXT:    vmov.16 q2[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q2
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: minnm_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vminnm.f16 q0, q1, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %cmp = fcmp fast ogt <8 x half> %src2, %src1
+  %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
+  ret <8 x half> %0
+}

From ac5bcbeb9f8c6ffd49f533d10241c23837336de7 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 13 Jul 2019 14:38:53 +0000
Subject: [PATCH 040/451] [ARM] MVE VRINT support

This adds support for the floor/ceil/trunc/... series of instructions,
converting to various forms of VRINT. They use the same suffixes as their
floating point counterparts. There is not VTINTR, so nearbyint is expanded.

Also added a copysign test, to show it is expanded.

Differential Revision: https://reviews.llvm.org/D63985

llvm-svn: 366003
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |   2 +
 llvm/lib/Target/ARM/ARMInstrMVE.td      |  23 ++
 llvm/test/CodeGen/Thumb2/mve-fmath.ll   | 177 ++++++++++
 llvm/test/CodeGen/Thumb2/mve-frint.ll   | 450 ++++++++++++++++++++++++
 4 files changed, 652 insertions(+)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-frint.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b7c894c2a8abb..2d8fadb724119 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -283,6 +283,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     if (HasMVEFP) {
       setOperationAction(ISD::FMINNUM, VT, Legal);
       setOperationAction(ISD::FMAXNUM, VT, Legal);
+      setOperationAction(ISD::FROUND, VT, Legal);
 
       // No native support for these.
       setOperationAction(ISD::FDIV, VT, Expand);
@@ -296,6 +297,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
       setOperationAction(ISD::FLOG10, VT, Expand);
       setOperationAction(ISD::FEXP, VT, Expand);
       setOperationAction(ISD::FEXP2, VT, Expand);
+      setOperationAction(ISD::FNEARBYINT, VT, Expand);
     }
   }
 
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 7ae91423fecbe..a6cc8cee65f1d 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2325,6 +2325,29 @@ multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
 defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
 defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
 
+let Predicates = [HasMVEFloat] in {
+  def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+}
+
 class MVEFloatArithNeon<string iname, string suffix, bit size,
                            dag oops, dag iops, string ops,
                            vpred_ops vpred, string cstr, list<dag> pattern=[]>
diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
index 5ce4e73d5c4ec..41054e2d34d14 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
@@ -1165,6 +1165,181 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
+; CHECK-LABEL: copysign_float32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    vstr s5, [sp, #8]
+; CHECK-NEXT:    ldr.w r12, [sp, #8]
+; CHECK-NEXT:    vstr s6, [sp, #16]
+; CHECK-NEXT:    ldr.w lr, [sp, #16]
+; CHECK-NEXT:    vstr s7, [sp, #24]
+; CHECK-NEXT:    lsr.w r2, r12, #31
+; CHECK-NEXT:    ldr r6, [sp, #24]
+; CHECK-NEXT:    vstr s3, [sp, #28]
+; CHECK-NEXT:    ldr r3, [sp, #28]
+; CHECK-NEXT:    vstr s4, [sp]
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    vstr s0, [sp, #4]
+; CHECK-NEXT:    ldr r1, [sp, #4]
+; CHECK-NEXT:    vstr s1, [sp, #12]
+; CHECK-NEXT:    lsrs r0, r0, #31
+; CHECK-NEXT:    vstr s2, [sp, #20]
+; CHECK-NEXT:    bfi r1, r0, #31, #1
+; CHECK-NEXT:    ldr r4, [sp, #12]
+; CHECK-NEXT:    ldr r5, [sp, #20]
+; CHECK-NEXT:    bfi r4, r2, #31, #1
+; CHECK-NEXT:    lsr.w r2, lr, #31
+; CHECK-NEXT:    bfi r5, r2, #31, #1
+; CHECK-NEXT:    lsrs r2, r6, #31
+; CHECK-NEXT:    bfi r3, r2, #31, #1
+; CHECK-NEXT:    vmov s3, r3
+; CHECK-NEXT:    vmov s2, r5
+; CHECK-NEXT:    vmov s1, r4
+; CHECK-NEXT:    vmov s0, r1
+; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
+entry:
+  %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) {
+; CHECK-LABEL: copysign_float16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .pad #32
+; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    vmov.u16 r0, q1[1]
+; CHECK-NEXT:    vmov.u16 r1, q0[0]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[0]
+; CHECK-NEXT:    vstr.16 s8, [sp, #24]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[2]
+; CHECK-NEXT:    vstr.16 s8, [sp, #28]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[3]
+; CHECK-NEXT:    vstr.16 s8, [sp, #20]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[4]
+; CHECK-NEXT:    vstr.16 s8, [sp, #16]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[5]
+; CHECK-NEXT:    vstr.16 s8, [sp, #12]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[6]
+; CHECK-NEXT:    vstr.16 s8, [sp, #8]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vmov.u16 r0, q1[7]
+; CHECK-NEXT:    vmov s4, r0
+; CHECK-NEXT:    vstr.16 s8, [sp, #4]
+; CHECK-NEXT:    vstr.16 s4, [sp]
+; CHECK-NEXT:    vmov.u16 r0, q0[1]
+; CHECK-NEXT:    vmov s4, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #25]
+; CHECK-NEXT:    vabs.f16 s4, s4
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s6, s4
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s4, s4, s6
+; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    vmov s4, r1
+; CHECK-NEXT:    ldrb.w r1, [sp, #29]
+; CHECK-NEXT:    vabs.f16 s4, s4
+; CHECK-NEXT:    ands r1, r1, #128
+; CHECK-NEXT:    vneg.f16 s6, s4
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vseleq.f16 s4, s4, s6
+; CHECK-NEXT:    vmov r1, s4
+; CHECK-NEXT:    vmov.16 q1[0], r1
+; CHECK-NEXT:    vmov.16 q1[1], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #21]
+; CHECK-NEXT:    vabs.f16 s8, s8
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s10, s8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s8, s8, s10
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[2], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #17]
+; CHECK-NEXT:    vabs.f16 s8, s8
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s10, s8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s8, s8, s10
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[3], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #13]
+; CHECK-NEXT:    vabs.f16 s8, s8
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s10, s8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s8, s8, s10
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[4], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #9]
+; CHECK-NEXT:    vabs.f16 s8, s8
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s10, s8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s8, s8, s10
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[5], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #5]
+; CHECK-NEXT:    vabs.f16 s8, s8
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s10, s8
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s8, s8, s10
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[6], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    ldrb.w r0, [sp, #1]
+; CHECK-NEXT:    vabs.f16 s0, s0
+; CHECK-NEXT:    ands r0, r0, #128
+; CHECK-NEXT:    vneg.f16 s2, s0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vseleq.f16 s0, s0, s2
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vmov.16 q1[7], r0
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2)
+  ret <8 x half> %0
+}
+
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
@@ -1174,6 +1349,7 @@ declare <4 x float> @llvm.log.v4f32(<4 x float>)
 declare <4 x float> @llvm.log2.v4f32(<4 x float>)
 declare <4 x float> @llvm.log10.v4f32(<4 x float>)
 declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
 declare <8 x half> @llvm.cos.v8f16(<8 x half>)
 declare <8 x half> @llvm.sin.v8f16(<8 x half>)
@@ -1183,4 +1359,5 @@ declare <8 x half> @llvm.log.v8f16(<8 x half>)
 declare <8 x half> @llvm.log2.v8f16(<8 x half>)
 declare <8 x half> @llvm.log10.v8f16(<8 x half>)
 declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
+declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
 
diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll
new file mode 100644
index 0000000000000..847d7ede1d73c
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll
@@ -0,0 +1,450 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
+
+define arm_aapcs_vfpcc <4 x float> @fceil_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: fceil_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vrintp.f32 s7, s3
+; CHECK-MVE-NEXT:    vrintp.f32 s6, s2
+; CHECK-MVE-NEXT:    vrintp.f32 s5, s1
+; CHECK-MVE-NEXT:    vrintp.f32 s4, s0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: fceil_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintp.f32 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: fceil_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-MVE-NEXT:    vmov s4, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vmov s4, r1
+; CHECK-MVE-NEXT:    vrintp.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r1, s4
+; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-MVE-NEXT:    vmov s0, r0
+; CHECK-MVE-NEXT:    vrintp.f16 s0, s0
+; CHECK-MVE-NEXT:    vmov r0, s0
+; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: fceil_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintp.f16 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.ceil.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: ftrunc_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vrintz.f32 s7, s3
+; CHECK-MVE-NEXT:    vrintz.f32 s6, s2
+; CHECK-MVE-NEXT:    vrintz.f32 s5, s1
+; CHECK-MVE-NEXT:    vrintz.f32 s4, s0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: ftrunc_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintz.f32 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: ftrunc_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-MVE-NEXT:    vmov s4, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vmov s4, r1
+; CHECK-MVE-NEXT:    vrintz.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r1, s4
+; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-MVE-NEXT:    vmov s0, r0
+; CHECK-MVE-NEXT:    vrintz.f16 s0, s0
+; CHECK-MVE-NEXT:    vmov r0, s0
+; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: ftrunc_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintz.f16 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.trunc.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: frint_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vrintx.f32 s7, s3
+; CHECK-MVE-NEXT:    vrintx.f32 s6, s2
+; CHECK-MVE-NEXT:    vrintx.f32 s5, s1
+; CHECK-MVE-NEXT:    vrintx.f32 s4, s0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: frint_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintx.f32 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: frint_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-MVE-NEXT:    vmov s4, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vmov s4, r1
+; CHECK-MVE-NEXT:    vrintx.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r1, s4
+; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-MVE-NEXT:    vmov s0, r0
+; CHECK-MVE-NEXT:    vrintx.f16 s0, s0
+; CHECK-MVE-NEXT:    vmov r0, s0
+; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: frint_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintx.f16 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.rint.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
+; CHECK-LABEL: fnearbyint_float32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrintr.f32 s7, s3
+; CHECK-NEXT:    vrintr.f32 s6, s2
+; CHECK-NEXT:    vrintr.f32 s5, s1
+; CHECK-NEXT:    vrintr.f32 s4, s0
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) {
+; CHECK-LABEL: fnearbyint_float16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-NEXT:    vmov s4, r0
+; CHECK-NEXT:    vrintr.f16 s4, s4
+; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    vmov s4, r1
+; CHECK-NEXT:    vrintr.f16 s4, s4
+; CHECK-NEXT:    vmov r1, s4
+; CHECK-NEXT:    vmov.16 q1[0], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-NEXT:    vmov.16 q1[1], r1
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vrintr.f16 s8, s8
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[2], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vrintr.f16 s8, s8
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[3], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vrintr.f16 s8, s8
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[4], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vrintr.f16 s8, s8
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[5], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-NEXT:    vmov s8, r0
+; CHECK-NEXT:    vrintr.f16 s8, s8
+; CHECK-NEXT:    vmov r0, s8
+; CHECK-NEXT:    vmov.16 q1[6], r0
+; CHECK-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-NEXT:    vmov s0, r0
+; CHECK-NEXT:    vrintr.f16 s0, s0
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vmov.16 q1[7], r0
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: ffloor_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vrintm.f32 s7, s3
+; CHECK-MVE-NEXT:    vrintm.f32 s6, s2
+; CHECK-MVE-NEXT:    vrintm.f32 s5, s1
+; CHECK-MVE-NEXT:    vrintm.f32 s4, s0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: ffloor_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintm.f32 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: ffloor_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-MVE-NEXT:    vmov s4, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vmov s4, r1
+; CHECK-MVE-NEXT:    vrintm.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r1, s4
+; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-MVE-NEXT:    vmov s0, r0
+; CHECK-MVE-NEXT:    vrintm.f16 s0, s0
+; CHECK-MVE-NEXT:    vmov r0, s0
+; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: ffloor_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrintm.f16 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.floor.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
+; CHECK-MVE-LABEL: fround_float32_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vrinta.f32 s7, s3
+; CHECK-MVE-NEXT:    vrinta.f32 s6, s2
+; CHECK-MVE-NEXT:    vrinta.f32 s5, s1
+; CHECK-MVE-NEXT:    vrinta.f32 s4, s0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: fround_float32_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrinta.f32 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %src)
+  ret <4 x float> %0
+}
+
+define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) {
+; CHECK-MVE-LABEL: fround_float16_t:
+; CHECK-MVE:       @ %bb.0: @ %entry
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[0]
+; CHECK-MVE-NEXT:    vmov.u16 r1, q0[1]
+; CHECK-MVE-NEXT:    vmov s4, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r0, s4
+; CHECK-MVE-NEXT:    vmov s4, r1
+; CHECK-MVE-NEXT:    vrinta.f16 s4, s4
+; CHECK-MVE-NEXT:    vmov r1, s4
+; CHECK-MVE-NEXT:    vmov.16 q1[0], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-MVE-NEXT:    vmov.16 q1[1], r1
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[2], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[3]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[3], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[4]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[4], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[5]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[5], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[6]
+; CHECK-MVE-NEXT:    vmov s8, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s8, s8
+; CHECK-MVE-NEXT:    vmov r0, s8
+; CHECK-MVE-NEXT:    vmov.16 q1[6], r0
+; CHECK-MVE-NEXT:    vmov.u16 r0, q0[7]
+; CHECK-MVE-NEXT:    vmov s0, r0
+; CHECK-MVE-NEXT:    vrinta.f16 s0, s0
+; CHECK-MVE-NEXT:    vmov r0, s0
+; CHECK-MVE-NEXT:    vmov.16 q1[7], r0
+; CHECK-MVE-NEXT:    vmov q0, q1
+; CHECK-MVE-NEXT:    bx lr
+;
+; CHECK-MVEFP-LABEL: fround_float16_t:
+; CHECK-MVEFP:       @ %bb.0: @ %entry
+; CHECK-MVEFP-NEXT:    vrinta.f16 q0, q0
+; CHECK-MVEFP-NEXT:    bx lr
+entry:
+  %0 = call fast <8 x half> @llvm.round.v8f16(<8 x half> %src)
+  ret <8 x half> %0
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
+declare <4 x float> @llvm.rint.v4f32(<4 x float>)
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
+declare <4 x float> @llvm.round.v4f32(<4 x float>)
+declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
+declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
+declare <8 x half> @llvm.rint.v8f16(<8 x half>)
+declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
+declare <8 x half> @llvm.floor.v8f16(<8 x half>)
+declare <8 x half> @llvm.round.v8f16(<8 x half>)

From 701bf714dbcab718067deaf4f343ce3e872b8578 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 13 Jul 2019 14:48:54 +0000
Subject: [PATCH 041/451] [ARM] MVE integer min and max

This simply makes the MVE integer min and max instructions legal and adds the
relevant patterns for them.

Differential Revision: https://reviews.llvm.org/D64026

llvm-svn: 366004
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |   4 +
 llvm/lib/Target/ARM/ARMInstrMVE.td      |  30 ++++++
 llvm/test/CodeGen/Thumb2/mve-minmax.ll  | 134 ++++++++++++++++++++++++
 3 files changed, 168 insertions(+)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2d8fadb724119..e538353fc7669 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -250,6 +250,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::SMIN, VT, Legal);
+    setOperationAction(ISD::SMAX, VT, Legal);
+    setOperationAction(ISD::UMIN, VT, Legal);
+    setOperationAction(ISD::UMAX, VT, Legal);
 
     // No native support for these.
     setOperationAction(ISD::UDIV, VT, Expand);
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index a6cc8cee65f1d..bc02fdae97b81 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -907,6 +907,36 @@ multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
 defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
 defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+
+  def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+}
+
 // end of mve_comp instructions
 
 // start of mve_imm_shift instructions
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index 38648afaabfa0..38990d3571744 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -2,6 +2,140 @@
 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
 
+define arm_aapcs_vfpcc <16 x i8> @smin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
+; CHECK-LABEL: smin_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.s8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp slt <16 x i8> %s1, %s2
+  %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
+  ret <16 x i8> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @smin_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
+; CHECK-LABEL: smin_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp slt <8 x i16> %s1, %s2
+  %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @smin_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
+; CHECK-LABEL: smin_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.s32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp slt <4 x i32> %s1, %s2
+  %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
+; CHECK-LABEL: umin_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ult <16 x i8> %s1, %s2
+  %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
+  ret <16 x i8> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @umin_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
+; CHECK-LABEL: umin_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ult <8 x i16> %s1, %s2
+  %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @umin_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
+; CHECK-LABEL: umin_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmin.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ult <4 x i32> %s1, %s2
+  %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
+  ret <4 x i32> %1
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
+; CHECK-LABEL: smax_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmax.s8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp sgt <16 x i8> %s1, %s2
+  %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
+  ret <16 x i8> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @smax_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
+; CHECK-LABEL: smax_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmax.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp sgt <8 x i16> %s1, %s2
+  %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @smax_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
+; CHECK-LABEL: smax_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmax.s32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp sgt <4 x i32> %s1, %s2
+  %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
+; CHECK-LABEL: umax_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmax.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <16 x i8> %s1, %s2
+  %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2
+  ret <16 x i8> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @umax_v8i16(<8 x i16> %s1, <8 x i16> %s2) {
+; CHECK-LABEL: umax_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmax.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <8 x i16> %s1, %s2
+  %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @umax_v4i32(<4 x i32> %s1, <4 x i32> %s2) {
+; CHECK-LABEL: umax_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmax.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ugt <4 x i32> %s1, %s2
+  %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2
+  ret <4 x i32> %1
+}
+
+
 define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-MVE-LABEL: maxnm_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry

From 4ce648b5e84cdbfbc1d386166bda27892f3aabba Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 13 Jul 2019 14:58:32 +0000
Subject: [PATCH 042/451] [ARM] MVE integer abs

Similar to floating point abs, we also have instructions for integers.

Differential Revision: https://reviews.llvm.org/D64027

llvm-svn: 366005
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  1 +
 llvm/lib/Target/ARM/ARMInstrMVE.td      |  9 ++++++
 llvm/test/CodeGen/Thumb2/mve-abs.ll     | 38 +++++++++++++++++++++++++
 3 files changed, 48 insertions(+)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-abs.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e538353fc7669..e8526d1f31ccc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -254,6 +254,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::SMAX, VT, Legal);
     setOperationAction(ISD::UMIN, VT, Legal);
     setOperationAction(ISD::UMAX, VT, Legal);
+    setOperationAction(ISD::ABS, VT, Legal);
 
     // No native support for these.
     setOperationAction(ISD::UDIV, VT, Expand);
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index bc02fdae97b81..e261b74fbf800 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2189,6 +2189,15 @@ def MVE_VABSs8  : MVE_VABSNEG_int<"vabs", "s8",  0b00, 0b0>;
 def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
 def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
+            (v16i8 (MVE_VABSs8 $v))>;
+  def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
+            (v8i16 (MVE_VABSs16 $v))>;
+  def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
+            (v4i32 (MVE_VABSs32 $v))>;
+}
+
 def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
 def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
 def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll
new file mode 100644
index 0000000000000..e0af56abf123b
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @abs_v16i8(<16 x i8> %s1) {
+; CHECK-LABEL: abs_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vabs.s8 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp slt <16 x i8> %s1, zeroinitializer
+  %1 = sub nsw <16 x i8> zeroinitializer, %s1
+  %2 = select <16 x i1> %0, <16 x i8> %1, <16 x i8> %s1
+  ret <16 x i8> %2
+}
+
+define arm_aapcs_vfpcc <8 x i16> @abs_v8i16(<8 x i16> %s1) {
+; CHECK-LABEL: abs_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vabs.s16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp slt <8 x i16> %s1, zeroinitializer
+  %1 = sub nsw <8 x i16> zeroinitializer, %s1
+  %2 = select <8 x i1> %0, <8 x i16> %1, <8 x i16> %s1
+  ret <8 x i16> %2
+}
+
+define arm_aapcs_vfpcc <4 x i32> @abs_v4i32(<4 x i32> %s1) {
+; CHECK-LABEL: abs_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vabs.s32 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp slt <4 x i32> %s1, zeroinitializer
+  %1 = sub nsw <4 x i32> zeroinitializer, %s1
+  %2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
+  ret <4 x i32> %2
+}

From 07a7ec202144ecde55016c3334651c72e77a9cce Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 13 Jul 2019 15:26:51 +0000
Subject: [PATCH 043/451] [ARM] MVE VNEG instruction patterns

This selects integer VNEG instructions, which can be especially useful with shifts.

Differential Revision: https://reviews.llvm.org/D64204

llvm-svn: 366006
---
 llvm/lib/Target/ARM/ARMInstrMVE.td  |  9 ++++++++
 llvm/test/CodeGen/Thumb2/mve-neg.ll | 32 +++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-neg.ll

diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index e261b74fbf800..dcee5d749bb11 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2202,6 +2202,15 @@ def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
 def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
 def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
+            (v16i8 (MVE_VNEGs8 $v))>;
+  def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
+            (v8i16 (MVE_VNEGs16 $v))>;
+  def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
+            (v4i32 (MVE_VNEGs32 $v))>;
+}
+
 class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
                    bit negate, list<dag> pattern=[]>
   : MVEIntSingleSrc<iname, suffix, size, pattern> {
diff --git a/llvm/test/CodeGen/Thumb2/mve-neg.ll b/llvm/test/CodeGen/Thumb2/mve-neg.ll
new file mode 100644
index 0000000000000..f1c4352e3edb0
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-neg.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @neg_v16i8(<16 x i8> %s1) {
+; CHECK-LABEL: neg_v16i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s8 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sub nsw <16 x i8> zeroinitializer, %s1
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @neg_v8i16(<8 x i16> %s1) {
+; CHECK-LABEL: neg_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sub nsw <8 x i16> zeroinitializer, %s1
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @neg_v4i32(<4 x i32> %s1) {
+; CHECK-LABEL: neg_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s32 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sub nsw <4 x i32> zeroinitializer, %s1
+  ret <4 x i32> %0
+}

From f6ce7ddecbc593a3911eb119f84e4b437aad8536 Mon Sep 17 00:00:00 2001
From: Mike Spertus <mike@spertus.com>
Date: Sat, 13 Jul 2019 15:27:53 +0000
Subject: [PATCH 044/451] Template-related improvements to Visual Studio
 visualizers

llvm-svn: 366007
---
 clang/utils/ClangVisualizers/clang.natvis | 55 ++++++++++++++++++++---
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/clang/utils/ClangVisualizers/clang.natvis b/clang/utils/ClangVisualizers/clang.natvis
index 42b674e7d7bb2..8b2bf49b41bb9 100644
--- a/clang/utils/ClangVisualizers/clang.natvis
+++ b/clang/utils/ClangVisualizers/clang.natvis
@@ -196,7 +196,11 @@ For later versions of Visual Studio, no setup is required-->
     <DisplayString IncludeView="MaybeEllipses" Condition="TypeForDecl == nullptr">(not yet known if parameter pack) </DisplayString>
     <DisplayString IncludeView="MaybeEllipses" Condition="((TemplateTypeParmType *)(((clang::ExtQualsTypeCommonBase *)(((uintptr_t)TypeForDecl->CanonicalType.Value.Value) &amp; ~(uintptr_t)((1 &lt;&lt; 4) - 1)))-&gt;BaseType))->CanTTPTInfo.ParameterPack">...</DisplayString>
     <DisplayString IncludeView="MaybeEllipses" Condition="!((TemplateTypeParmType *)(((clang::ExtQualsTypeCommonBase *)(((uintptr_t)TypeForDecl->CanonicalType.Value.Value) &amp; ~(uintptr_t)((1 &lt;&lt; 4) - 1)))-&gt;BaseType))->CanTTPTInfo.ParameterPack"></DisplayString>
-    <DisplayString>{*this,view(TorC)} {*this,view(MaybeEllipses)}{Name,view(cpp)}</DisplayString> 
+    <DisplayString IncludeView="DefaultArg" Condition="(DefaultArgument.ValueOrInherited.Val.Value&amp;3LL) == 0">{(TypeSourceInfo *)(DefaultArgument.ValueOrInherited.Val.Value&amp;~3LL),view(cpp)}</DisplayString>
+    <DisplayString IncludeView="DefaultArg">{{InheritedInitializer}}</DisplayString>
+    <DisplayString IncludeView="Initializer" Condition="DefaultArgument.ValueOrInherited.Val.Value&amp;~3LL">= {this,view(DefaultArg)na}</DisplayString>
+    <DisplayString IncludeView="Initializer"></DisplayString>
+    <DisplayString>{*this,view(TorC)} {*this,view(MaybeEllipses)}{Name,view(cpp)} {this,view(Initializer)na}</DisplayString> 
   </Type>
   <Type Name="clang::TemplateDecl">
     <DisplayString>template{TemplateParams,na} {*TemplatedDecl};</DisplayString>
@@ -288,7 +292,7 @@ For later versions of Visual Studio, no setup is required-->
     </Expand>
   </Type>
   <Type Name="clang::SubstTemplateTypeParmType">
-    <DisplayString>{*Replaced,view(cpp)} &lt;= {CanonicalType,view(cpp)}</DisplayString>
+    <DisplayString>{{{*Replaced,view(cpp)} &lt;= {CanonicalType,view(cpp)}}}</DisplayString>
     <Expand>
       <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
       <Item Name="Replaced">*Replaced</Item>
@@ -353,7 +357,7 @@ For later versions of Visual Studio, no setup is required-->
     </Expand>
   </Type>
   <Type Name="clang::InjectedClassNameType">
-    <DisplayString>{InjectedType,view(cpp)}</DisplayString>
+    <DisplayString>{Decl,view(cpp)}</DisplayString>
     <Expand>
       <Item Name="Decl">Decl</Item>
       <Item Name="InjectedType">InjectedType</Item>
@@ -424,12 +428,21 @@ For later versions of Visual Studio, no setup is required-->
       <ExpandedItem>Ty</ExpandedItem>
     </Expand>
   </Type>
-  <Type Name="clang::TemplateArgumentLoc">
-    <DisplayString>{Argument}</DisplayString>
+  <Type Name="clang::TypeLoc">
+    <DisplayString>{(QualType *)&amp;Ty,na}</DisplayString>
     <Expand>
-      <ExpandedItem>Argument</ExpandedItem>
+      <Item Name="Ty">(QualType *)&amp;Ty</Item>
+      <Item Name="Data">Data</Item>
     </Expand>
   </Type>
+  <Type Name="clang::TypeLocBuilder">
+    <DisplayString Optional="true" Condition="LastTy.Value.Value==0">Not building anything</DisplayString>
+    <DisplayString Optional="true">Building a {LastTy}</DisplayString>
+  </Type>
+  <Type Name="clang::TemplateArgumentLoc">
+    <DisplayString IncludeView="cpp">{Argument,view(cpp)}</DisplayString>
+    <DisplayString>{Argument}</DisplayString>
+  </Type>
   <Type Name="clang::TemplateArgument">
     <DisplayString IncludeView="cpp" Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">{*(clang::QualType *)&amp;TypeOrValue.V,view(cpp)}</DisplayString>
     <DisplayString Condition="Integer.Kind == clang::TemplateArgument::ArgKind::Type">{(clang::TemplateArgument::ArgKind)TypeOrValue.Kind,en} template argument: {*(clang::QualType *)&amp;TypeOrValue.V}</DisplayString>
@@ -459,6 +472,21 @@ For later versions of Visual Studio, no setup is required-->
       <!-- TODO: Other kinds-->
     </Expand>
   </Type>
+  <Type Name="clang::TemplateArgumentListInfo">
+    <DisplayString IncludeView ="elt0" Condition="Arguments.Size == 0"></DisplayString>
+    <DisplayString IncludeView ="elt0">{((TemplateArgumentLoc*)Arguments.BeginX)[0],view(cpp)}{*this,view(elt1)}</DisplayString>
+    <DisplayString IncludeView ="elt1" Condition="Arguments.Size == 1"></DisplayString>
+    <DisplayString IncludeView ="elt1">, {((TemplateArgumentLoc*)Arguments.BeginX)[1],view(cpp)}{*this,view(elt2)}</DisplayString>
+    <DisplayString IncludeView ="elt2" Condition="Arguments.Size == 2"></DisplayString>
+    <DisplayString IncludeView ="elt2">, {((TemplateArgumentLoc*)Arguments.BeginX)[2],view(cpp)}{*this,view(elt3)}</DisplayString>
+    <DisplayString IncludeView ="elt3" Condition="Arguments.Size == 3"></DisplayString>
+    <DisplayString IncludeView ="elt3">, {((TemplateArgumentLoc*)Arguments.BeginX)[3],view(cpp)}{*this,view(elt4)}</DisplayString>
+    <DisplayString IncludeView ="elt4" Condition="Arguments.Size == 4"></DisplayString>
+    <DisplayString IncludeView ="elt4">, ...</DisplayString>
+    <DisplayString Condition="Arguments.Size == 0">empty</DisplayString>
+    <DisplayString Condition="Arguments.Size != 0">&lt;{*this,view(elt0)}&gt;</DisplayString>
+    <DisplayString>Uninitialized</DisplayString>
+  </Type>
   <Type Name="clang::TemplateArgumentList">
     <DisplayString IncludeView="arg0" Condition="NumArguments==0"></DisplayString>
     <DisplayString IncludeView="arg0">{Arguments[0],view(cpp)}{*this,view(arg1)}</DisplayString>
@@ -483,6 +511,17 @@ For later versions of Visual Studio, no setup is required-->
     <DisplayString IncludeView="arg2" Condition="Length==2"></DisplayString>
     <DisplayString IncludeView="arg2">, {Data[2],view(cpp)}, ...</DisplayString>
     <DisplayString>&lt;{*this,view(arg0)}&gt;</DisplayString>
+    <Expand>
+      <Item Name="Length">Length</Item>
+      <Synthetic Name="Data">
+        <Expand>
+          <ArrayItems>
+            <Size>Length</Size>
+            <ValuePointer>Data</ValuePointer>
+          </ArrayItems>
+        </Expand>
+      </Synthetic>
+    </Expand>
   </Type>
   <Type Name="clang::MultiLevelTemplateArgumentList">
     <DisplayString IncludeView="level0" Condition="(llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.EndX - (llvm::ArrayRef&lt;clang::TemplateArgument&gt; *)TemplateArgumentLists.BeginX==0"></DisplayString>
@@ -558,6 +597,7 @@ For later versions of Visual Studio, no setup is required-->
         <Size>TemplateSpecializationTypeBits.NumArgs</Size>
         <ValuePointer>(clang::TemplateArgument *)(this+1)</ValuePointer>
       </ArrayItems>
+      <ExpandedItem>*(clang::Type *)this, view(cmn)</ExpandedItem>
     </Expand>
   </Type>
   <Type Name="clang::DeducedType">
@@ -745,7 +785,7 @@ For later versions of Visual Studio, no setup is required-->
     <DisplayString>{this,view(retType)nand} {Name,view(cpp)nd}({*this,view(parm0)nd})</DisplayString>
     <Expand>
       <ExpandedItem>(clang::DeclaratorDecl *)this,nd</ExpandedItem>
-      <Item Name="ReturnType">*this,view(retType)nd</Item>
+      <Item Name="ReturnType">((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)(((uintptr_t)DeclType.Value.Value) &amp; ~15))-&gt;BaseType)->ResultType</Item>
       <Synthetic Name="Parameter Types">
         <DisplayString>{*this,view(parm0)nd}</DisplayString>
         <Expand>
@@ -755,6 +795,7 @@ For later versions of Visual Studio, no setup is required-->
           </ArrayItems>
         </Expand>
       </Synthetic>
+      <Item Name="TemplateOrSpecialization">TemplateOrSpecialization</Item>
     </Expand>
   </Type>
   <Type Name="clang::OpaquePtr&lt;*&gt;">

From 458a720ec17234f01d6e92e557436187e6f32c4b Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 13 Jul 2019 15:43:00 +0000
Subject: [PATCH 045/451] [ARM] Add sign and zero extend patterns for MVE

The vmovlb instructions can be uses to sign or zero extend vector registers
between types. This adds some patterns for them and relevant testing. The
VBICIMM generation is also put behind a hasNEON check (as is already done for
VORRIMM).

Code originally by David Sherwood.

Differential Revision: https://reviews.llvm.org/D64069

llvm-svn: 366008
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  2 +-
 llvm/lib/Target/ARM/ARMInstrMVE.td      | 17 +++++
 llvm/test/CodeGen/Thumb2/mve-sext.ll    | 93 +++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-sext.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e8526d1f31ccc..5e2de61e288f9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -11180,7 +11180,7 @@ static SDValue PerformANDCombine(SDNode *N,
   APInt SplatBits, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  if (BVN &&
+  if (BVN && Subtarget->hasNEON() &&
       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
     if (SplatBitSize <= 64) {
       EVT VbicVT;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index dcee5d749bb11..10ed876f484a0 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1002,6 +1002,23 @@ defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
 defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
 defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
+            (MVE_VMOVLs16bh MQPR:$src)>;
+  def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
+            (MVE_VMOVLs8bh MQPR:$src)>;
+  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
+            (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
+
+  // zext_inreg 16 -> 32
+  def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
+            (MVE_VMOVLu16bh MQPR:$src)>;
+  // zext_inreg 8 -> 16
+  def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
+            (MVE_VMOVLu8bh MQPR:$src)>;
+}
+
+
 class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
                     dag immops, list<dag> pattern=[]>
   : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll
new file mode 100644
index 0000000000000..9458fdc47e582
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) {
+; CHECK-LABEL: sext_v8i8_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmovlb.s8 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sext <8 x i8> %src to <8 x i16>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) {
+; CHECK-LABEL: sext_v4i16_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmovlb.s16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sext <4 x i16> %src to <4 x i32>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) {
+; CHECK-LABEL: sext_v4i8_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmovlb.s8 q0, q0
+; CHECK-NEXT:    vmovlb.s16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sext <4 x i8> %src to <4 x i32>
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
+; CHECK-LABEL: zext_v8i8_v8i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmovlb.u8 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext <8 x i8> %src to <8 x i16>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) {
+; CHECK-LABEL: zext_v4i16_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmovlb.u16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext <4 x i16> %src to <4 x i32>
+  ret <4 x i32> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) {
+; CHECK-LABEL: zext_v4i8_v4i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.i32 q1, #0xff
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = zext <4 x i8> %src to <4 x i32>
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
+; CHECK-LABEL: trunc_v8i16_v8i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = trunc <8 x i16> %src to <8 x i8>
+  ret <8 x i8> %0
+}
+
+define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) {
+; CHECK-LABEL: trunc_v4i32_v4i16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = trunc <4 x i32> %src to <4 x i16>
+  ret <4 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) {
+; CHECK-LABEL: trunc_v4i32_v4i8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = trunc <4 x i32> %src to <4 x i8>
+  ret <4 x i8> %0
+}

From e0363adb75f593b23300991c7b8a47b16c228be0 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Sat, 13 Jul 2019 17:01:00 +0000
Subject: [PATCH 046/451] [Attributor][Fix] Never override given argument
 numbers

llvm-svn: 366009
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 88b6af3abbd3d..9e8a680d82900 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -182,8 +182,8 @@ struct Attributor {
     // Determine the argument number automatically for llvm::Arguments if none
     // is set. Do not override a given one as it could be a use of the argument
     // in a call site.
-    if (auto *Arg = dyn_cast<Argument>(&V))
-      if (ArgNo == -1)
+    if (ArgNo == -1)
+      if (auto *Arg = dyn_cast<Argument>(&V))
         ArgNo = Arg->getArgNo();
 
     // If a function was given together with an argument number, perform the
@@ -232,10 +232,13 @@ struct Attributor {
                   "'AbstractAttribute'!");
 
     // Determine the anchor value and the argument number which are used to
-    // lookup the attribute together with AAType::ID.
+    // lookup the attribute together with AAType::ID. If passed an argument,
+    // use its argument number but do not override a given one as it could be a
+    // use of the argument at a call site.
     Value &AnchoredVal = AA.getAnchoredValue();
-    if (auto *Arg = dyn_cast<Argument>(&AnchoredVal))
-      ArgNo = Arg->getArgNo();
+    if (ArgNo == -1)
+      if (auto *Arg = dyn_cast<Argument>(&AnchoredVal))
+        ArgNo = Arg->getArgNo();
 
     // Put the attribute in the lookup map structure and the container we use to
     // keep track of all attributes.

From 124cae7d3fc5249a22ab309c48262413865db266 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Sat, 13 Jul 2019 19:49:39 +0000
Subject: [PATCH 047/451] Remove extra ';' to silent compiler warning.

- Plus extra style formatting.

llvm-svn: 366010
---
 clang/lib/Sema/SemaDecl.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 73407afb49f34..1324abb04a741 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -11082,7 +11082,8 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit,
   return VDecl->isInvalidDecl();
 }
 
-void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation Loc) {
+void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init,
+                                              SourceLocation Loc) {
   if (auto *CE = dyn_cast<ConstantExpr>(Init))
     Init = CE->getSubExpr();
 
@@ -11113,7 +11114,7 @@ void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation L
     if (InitType.hasNonTrivialToPrimitiveCopyCUnion())
       checkNonTrivialCUnion(InitType, Loc, NTCUC_CopyInit, NTCUK_Copy);
   }
-};
+}
 
 namespace {
 

From 9450b0084af2cb0d4273e8accfdb3f11b947206e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 14 Jul 2019 04:13:33 +0000
Subject: [PATCH 048/451] [X86] Remove offset of 8 from the call to FuseInst
 for UNPCKLPDrr folding added in r365287.

This was copy/pasted from above and I forgot to change it. We just
need the default offset of 0 here.

Fixes PR42616.

llvm-svn: 366011
---
 llvm/lib/Target/X86/X86InstrInfo.cpp |  2 +-
 llvm/test/CodeGen/X86/pr42616.ll     | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr42616.ll

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 38b66d8c8af93..e5d3a09c291b9 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4641,7 +4641,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
       if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) {
         MachineInstr *NewMI =
-            FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this, 8);
+            FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
         return NewMI;
       }
     }
diff --git a/llvm/test/CodeGen/X86/pr42616.ll b/llvm/test/CodeGen/X86/pr42616.ll
new file mode 100644
index 0000000000000..961d99b207d62
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr42616.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 | FileCheck %s
+
+define <2 x double> @pr42616(<2 x double> %a0, <2 x double> %a1, <2 x double>* %p) {
+  ;CHECK-LABEL: pr42616
+  ;CHECK:       movhpd (%esp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = load <2 x double>, <2 x double>* %p, align 1
+  %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 2, i32 0>
+  %4 = fadd <2 x double> %a0, %3
+  ret <2 x double> %4
+}

From c7f9559d659f5c889cd19921b57f0201109e494d Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 14 Jul 2019 06:46:46 +0000
Subject: [PATCH 049/451] [Driver] Simplify -lgcc & -lgcc_s

gcc defaults to -shared-libgcc in C++ mode.
Letting getLibGccType() return SharedLibGcc simplifies the logic.

llvm-svn: 366012
---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 30 ++++++++++------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index d0c9d7d396272..b6a0afd7e5ecc 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1135,16 +1135,12 @@ bool tools::isObjCAutoRefCount(const ArgList &Args) {
 
 enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc };
 
-static LibGccType getLibGccType(const ArgList &Args) {
-  bool Static = Args.hasArg(options::OPT_static_libgcc) ||
-                Args.hasArg(options::OPT_static) ||
-                Args.hasArg(options::OPT_static_pie);
-
-  bool Shared = Args.hasArg(options::OPT_shared_libgcc);
-  if (Shared)
-    return LibGccType::SharedLibGcc;
-  if (Static)
+static LibGccType getLibGccType(const Driver &D, const ArgList &Args) {
+  if (Args.hasArg(options::OPT_static_libgcc) ||
+      Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_pie))
     return LibGccType::StaticLibGcc;
+  if (Args.hasArg(options::OPT_shared_libgcc) || D.CCCIsCXX())
+    return LibGccType::SharedLibGcc;
   return LibGccType::UnspecifiedLibGcc;
 }
 
@@ -1170,8 +1166,8 @@ static void AddUnwindLibrary(const ToolChain &TC, const Driver &D,
       UNW == ToolChain::UNW_None)
     return;
 
-  LibGccType LGT = getLibGccType(Args);
-  bool AsNeeded = D.CCCIsCC() && LGT == LibGccType::UnspecifiedLibGcc &&
+  LibGccType LGT = getLibGccType(D, Args);
+  bool AsNeeded = LGT == LibGccType::UnspecifiedLibGcc &&
                   !TC.getTriple().isAndroid() && !TC.getTriple().isOSCygMing();
   if (AsNeeded)
     CmdArgs.push_back("--as-needed");
@@ -1180,11 +1176,11 @@ static void AddUnwindLibrary(const ToolChain &TC, const Driver &D,
   case ToolChain::UNW_None:
     return;
   case ToolChain::UNW_Libgcc: {
-    LibGccType LGT = getLibGccType(Args);
-    if (LGT == LibGccType::UnspecifiedLibGcc || LGT == LibGccType::SharedLibGcc)
-      CmdArgs.push_back("-lgcc_s");
-    else if (LGT == LibGccType::StaticLibGcc)
+    LibGccType LGT = getLibGccType(D, Args);
+    if (LGT == LibGccType::StaticLibGcc)
       CmdArgs.push_back("-lgcc_eh");
+    else
+      CmdArgs.push_back("-lgcc_s");
     break;
   }
   case ToolChain::UNW_CompilerRT:
@@ -1200,7 +1196,7 @@ static void AddLibgcc(const ToolChain &TC, const Driver &D,
                       ArgStringList &CmdArgs, const ArgList &Args) {
   bool isAndroid = TC.getTriple().isAndroid();
 
-  LibGccType LGT = getLibGccType(Args);
+  LibGccType LGT = getLibGccType(D, Args);
   bool LibGccFirst = (D.CCCIsCC() && LGT == LibGccType::UnspecifiedLibGcc) ||
                      LGT == LibGccType::StaticLibGcc;
   if (LibGccFirst)
@@ -1216,7 +1212,7 @@ static void AddLibgcc(const ToolChain &TC, const Driver &D,
   //
   // NOTE: This fixes a link error on Android MIPS as well.  The non-static
   // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl.
-  if (isAndroid && getLibGccType(Args) != LibGccType::StaticLibGcc)
+  if (isAndroid && getLibGccType(D, Args) != LibGccType::StaticLibGcc)
     CmdArgs.push_back("-ldl");
 }
 

From e9dc9c2bebb1a7503bf71befe7d8c4daae964c1e Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 14 Jul 2019 07:16:13 +0000
Subject: [PATCH 050/451] [Driver] Simplify AddLibgcc

llvm-svn: 366013
---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index b6a0afd7e5ecc..99691cb43dc42 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1194,17 +1194,11 @@ static void AddUnwindLibrary(const ToolChain &TC, const Driver &D,
 
 static void AddLibgcc(const ToolChain &TC, const Driver &D,
                       ArgStringList &CmdArgs, const ArgList &Args) {
-  bool isAndroid = TC.getTriple().isAndroid();
-
   LibGccType LGT = getLibGccType(D, Args);
-  bool LibGccFirst = (D.CCCIsCC() && LGT == LibGccType::UnspecifiedLibGcc) ||
-                     LGT == LibGccType::StaticLibGcc;
-  if (LibGccFirst)
+  if (LGT != LibGccType::SharedLibGcc)
     CmdArgs.push_back("-lgcc");
-
   AddUnwindLibrary(TC, D, CmdArgs, Args);
-
-  if (!LibGccFirst)
+  if (LGT == LibGccType::SharedLibGcc)
     CmdArgs.push_back("-lgcc");
 
   // According to Android ABI, we have to link with libdl if we are
@@ -1212,7 +1206,7 @@ static void AddLibgcc(const ToolChain &TC, const Driver &D,
   //
   // NOTE: This fixes a link error on Android MIPS as well.  The non-static
   // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl.
-  if (isAndroid && getLibGccType(D, Args) != LibGccType::StaticLibGcc)
+  if (TC.getTriple().isAndroid() && LGT != LibGccType::StaticLibGcc)
     CmdArgs.push_back("-ldl");
 }
 

From 44a4bf95ddf16cfc9cad36a5fd1936fd67a9b06a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 14 Jul 2019 11:10:04 +0000
Subject: [PATCH 051/451] [MachineOutliner] Add missing initializers for
 OutlinedFunction. NFCI.

Appeases MSVC/cppcheck.

llvm-svn: 366014
---
 llvm/include/llvm/CodeGen/MachineOutliner.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index 377df4e18a2a0..3868fa4155799 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -171,13 +171,13 @@ struct OutlinedFunction {
 
   /// Represents the size of a sequence in bytes. (Some instructions vary
   /// widely in size, so just counting the instructions isn't very useful.)
-  unsigned SequenceSize;
+  unsigned SequenceSize = 0;
 
   /// Target-defined overhead of constructing a frame for this function.
-  unsigned FrameOverhead;
+  unsigned FrameOverhead = 0;
 
   /// Target-defined identifier for constructing a frame for this function.
-  unsigned FrameConstructionID;
+  unsigned FrameConstructionID = 0;
 
   /// Return the number of candidates for this \p OutlinedFunction.
   unsigned getOccurrenceCount() const { return Candidates.size(); }

From a3f9240bfbbf5d6f2fb1bc329e00f2c20328d3d9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 14 Jul 2019 11:41:52 +0000
Subject: [PATCH 052/451] SlotIndexes - add missing initializer. NFCI.

llvm-svn: 366015
---
 llvm/include/llvm/CodeGen/SlotIndexes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index 10ab4cca8319a..2b32a4d30dff2 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -347,7 +347,7 @@ class raw_ostream;
   public:
     static char ID;
 
-    SlotIndexes() : MachineFunctionPass(ID) {
+    SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
       initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     }
 

From f66f5ff38ab25043aed6e379b27a298196e764b9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 14 Jul 2019 11:47:36 +0000
Subject: [PATCH 053/451] VirtRegMap - add missing initializers. NFCI.

llvm-svn: 366016
---
 llvm/include/llvm/CodeGen/VirtRegMap.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h
index 7a64d674ecac0..70eb048f05ebe 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -67,8 +67,10 @@ class TargetInstrInfo;
   public:
     static char ID;
 
-    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
+    VirtRegMap()
+        : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr),
+          MF(nullptr), Virt2PhysMap(NO_PHYS_REG),
+          Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {}
     VirtRegMap(const VirtRegMap &) = delete;
     VirtRegMap &operator=(const VirtRegMap &) = delete;
 

From 864474c9c72a647e1d9bc7546df86103ce043f4f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sun, 14 Jul 2019 12:35:50 +0000
Subject: [PATCH 054/451] [BitcodeReader] Use tighter upper bound to validate
 forward references.

At the moment, bitcode files with invalid forward reference can easily
cause the bitcode reader to run out of memory, by creating a forward
reference with a very high index.

We can use the size of the bitcode file as an upper bound, because a
valid bitcode file can never contain more records. This should be
sufficient to fail early in most cases. The only exception is large
files with invalid forward references close to the file size.

There are a couple of clusterfuzz runs that fail with out-of-memory
because of very high forward references and they should be fixed by this
patch.

A concrete example for this is D64507, which causes out-of-memory on
systems with low memory, like the hexagon upstream bots.

Reviewers: t.p.northover, thegameg, jfb, efriedma, hfinkel

Reviewed By: jfb

Differential Revision: https://reviews.llvm.org/D64577

llvm-svn: 366017
---
 llvm/include/llvm/Bitstream/BitstreamReader.h |  8 ++++++--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  2 +-
 llvm/lib/Bitcode/Reader/MetadataLoader.cpp    | 20 +++++++++++++++----
 llvm/lib/Bitcode/Reader/ValueList.cpp         |  8 ++++++--
 llvm/lib/Bitcode/Reader/ValueList.h           |  9 ++++++++-
 llvm/test/Bitcode/pr18704.ll                  |  2 +-
 6 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h
index ccb4a492b9d56..ee82e7ec1ba23 100644
--- a/llvm/include/llvm/Bitstream/BitstreamReader.h
+++ b/llvm/include/llvm/Bitstream/BitstreamReader.h
@@ -294,6 +294,9 @@ class SimpleBitstreamCursor {
     BitsInCurWord = 0;
   }
 
+  /// Return the size of the stream in bytes.
+  size_t SizeInBytes() const { return BitcodeBytes.size(); }
+
   /// Skip to the end of the file.
   void skipToEnd() { NextChar = BitcodeBytes.size(); }
 };
@@ -364,17 +367,18 @@ class BitstreamCursor : SimpleBitstreamCursor {
   explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
       : SimpleBitstreamCursor(BitcodeBytes) {}
 
-  using SimpleBitstreamCursor::canSkipToPos;
   using SimpleBitstreamCursor::AtEndOfStream;
+  using SimpleBitstreamCursor::canSkipToPos;
+  using SimpleBitstreamCursor::fillCurWord;
   using SimpleBitstreamCursor::getBitcodeBytes;
   using SimpleBitstreamCursor::GetCurrentBitNo;
   using SimpleBitstreamCursor::getCurrentByteNo;
   using SimpleBitstreamCursor::getPointerToByte;
   using SimpleBitstreamCursor::JumpToBit;
-  using SimpleBitstreamCursor::fillCurWord;
   using SimpleBitstreamCursor::Read;
   using SimpleBitstreamCursor::ReadVBR;
   using SimpleBitstreamCursor::ReadVBR64;
+  using SimpleBitstreamCursor::SizeInBytes;
 
   /// Return the number of bits used to encode an abbrev #.
   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 09bd0f4ec71cd..d07edefcffacc 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -858,7 +858,7 @@ BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
                              StringRef ProducerIdentification,
                              LLVMContext &Context)
     : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
-      ValueList(Context) {
+      ValueList(Context, Stream.SizeInBytes()) {
   this->ProducerIdentification = ProducerIdentification;
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 24620ed10d747..108f71189585f 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -130,8 +130,15 @@ class BitcodeReaderMetadataList {
 
   LLVMContext &Context;
 
+  /// Maximum number of valid references. Forward references exceeding the
+  /// maximum must be invalid.
+  unsigned RefsUpperBound;
+
 public:
-  BitcodeReaderMetadataList(LLVMContext &C) : Context(C) {}
+  BitcodeReaderMetadataList(LLVMContext &C, size_t RefsUpperBound)
+      : Context(C),
+        RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+                                RefsUpperBound)) {}
 
   // vector compatibility methods
   unsigned size() const { return MetadataPtrs.size(); }
@@ -218,6 +225,10 @@ void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
 }
 
 Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
+  // Bail out for a clearly invalid value.
+  if (Idx >= RefsUpperBound)
+    return nullptr;
+
   if (Idx >= size())
     resize(Idx + 1);
 
@@ -625,9 +636,10 @@ class MetadataLoader::MetadataLoaderImpl {
                      BitcodeReaderValueList &ValueList,
                      std::function<Type *(unsigned)> getTypeByID,
                      bool IsImporting)
-      : MetadataList(TheModule.getContext()), ValueList(ValueList),
-        Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule),
-        getTypeByID(std::move(getTypeByID)), IsImporting(IsImporting) {}
+      : MetadataList(TheModule.getContext(), Stream.SizeInBytes()),
+        ValueList(ValueList), Stream(Stream), Context(TheModule.getContext()),
+        TheModule(TheModule), getTypeByID(std::move(getTypeByID)),
+        IsImporting(IsImporting) {}
 
   Error parseMetadata(bool ModuleLevel);
 
diff --git a/llvm/lib/Bitcode/Reader/ValueList.cpp b/llvm/lib/Bitcode/Reader/ValueList.cpp
index da2d24d103b20..431995fd40ac7 100644
--- a/llvm/lib/Bitcode/Reader/ValueList.cpp
+++ b/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -97,6 +97,10 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx, Type *FullTy) {
 }
 
 Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
+  // Bail out for a clearly invalid value.
+  if (Idx >= RefsUpperBound)
+    return nullptr;
+
   if (Idx >= size())
     resize(Idx + 1);
 
@@ -114,8 +118,8 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
 
 Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty,
                                               Type **FullTy) {
-  // Bail out for a clearly invalid value. This would make us call resize(0)
-  if (Idx == std::numeric_limits<unsigned>::max())
+  // Bail out for a clearly invalid value.
+  if (Idx >= RefsUpperBound)
     return nullptr;
 
   if (Idx >= size())
diff --git a/llvm/lib/Bitcode/Reader/ValueList.h b/llvm/lib/Bitcode/Reader/ValueList.h
index 1c54911650fe8..49900498c2944 100644
--- a/llvm/lib/Bitcode/Reader/ValueList.h
+++ b/llvm/lib/Bitcode/Reader/ValueList.h
@@ -46,8 +46,15 @@ class BitcodeReaderValueList {
   ResolveConstantsTy ResolveConstants;
   LLVMContext &Context;
 
+  /// Maximum number of valid references. Forward references exceeding the
+  /// maximum must be invalid.
+  unsigned RefsUpperBound;
+
 public:
-  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  BitcodeReaderValueList(LLVMContext &C, size_t RefsUpperBound)
+      : Context(C),
+        RefsUpperBound(std::min((size_t)std::numeric_limits<unsigned>::max(),
+                                RefsUpperBound)) {}
 
   ~BitcodeReaderValueList() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
diff --git a/llvm/test/Bitcode/pr18704.ll b/llvm/test/Bitcode/pr18704.ll
index e57ce3cec4c05..1f1abfaab9478 100644
--- a/llvm/test/Bitcode/pr18704.ll
+++ b/llvm/test/Bitcode/pr18704.ll
@@ -1,6 +1,6 @@
 ; RUN:  not llvm-dis < %s.bc 2>&1 | FileCheck %s
 
-; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Never resolved value found in function
+; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Invalid record
 
 ; pr18704.ll.bc has an instruction referring to invalid type.
 ; The test checks that LLVM reports the error and doesn't access freed memory

From 19d3fdb08b722a4a66b21b3e08d2008c95f968e8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sun, 14 Jul 2019 14:06:25 +0000
Subject: [PATCH 055/451] Recommit "[BitcodeReader] Validate OpNum, before
 accessing Record array."

This recommits r365750 (git commit 8b222ecf2769ee133691f208f6166ce118c4a164)

Original message:

   Currently invalid bitcode files can cause a crash, when OpNum exceeds
   the number of elements in Record, like in the attached bitcode file.

   The test case was generated by clusterfuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=15698

   Reviewers: t.p.northover, thegameg, jfb

   Reviewed By: jfb

   Differential Revision: https://reviews.llvm.org/D64507

   llvm-svn: 365750jkkkk

llvm-svn: 366018
---
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp      |   4 ++++
 llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc | Bin 0 -> 908 bytes
 llvm/test/Bitcode/invalid.test                 |   5 +++++
 3 files changed, 9 insertions(+)
 create mode 100644 llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc

diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index d07edefcffacc..6cad3b94e5e74 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -4171,6 +4171,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
         return error("Invalid record");
 
+      if (OpNum >= Record.size())
+        return error(
+            "Invalid record: operand number exceeded available operands");
+
       unsigned PredVal = Record[OpNum];
       bool IsFP = LHS->getType()->isFPOrFPVectorTy();
       FastMathFlags FMF;
diff --git a/llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc b/llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc
new file mode 100644
index 0000000000000000000000000000000000000000..454a14b8611ac37baabb23893b20fa275fe4b2fd
GIT binary patch
literal 908
zcmXX_O=uHO6rb%T*|m+6)do#;ciHU+<lx7WSWMjHkIC|viWO4)KtZu75~>H;rXfu$
zNYY)I=s(ITJ>=lQfd7NH96WdsX_8t)34S9zlwxQMBDDlRz=NQ(tq<Ps&AfSU=Dpv{
zM3lLe0Ve=f0KlkS>Ki)yw6*^BpS`DUG1eOPi*^8B;<c_05p-bNjg`vw-U-(%M{SHZ
zaF@{S{Z`t|TmBo(F#mQ$?si3V#_ROcu4A3vNN&(@3d)h(|G@2yGzX!vt!F+O9?s?)
z+Q*v~@rdn)0Kitfr?-dRB3!&~>F#-;V_4OThI9K+F5ji=Lrh;U=m$5mnZQgYclkDO
zw&*#W0(OaZcR}eBXmj#cj3ptO+y#KJg9J(J^U)c0zQM+wL@1~D^=dwnp{CN*L}A?C
z0*)^X{GNe6dxWo)xfew?E?GCiwB2<wR8vBEGE`6JlPYzUrltmws#4P`nMxy7K<@fB
zxMskH<hUX^R@<;0g+6AUTM^kMk*mb`xO78)qRFq6z#}sFL<{PqTyHy5Bh=L~>fjhk
zV9X%ZK<WsU@=&RtR=n+G23?@hxC<o;XsVFiv)+d5AK;HUxF(UKwIbUrvh@vIj<Jg(
zJD=qiW48B(5~wMG!UjIB`ezk?j+AGs`|+W_X5@FIT&miiUqb2`lu)(g7?mDDNf%1q
zNA4FV9P7aGH42*sTs`b)2yhK|A$YjuGG7VvZ$x%E%P)Umn+@(W!N*<0{*rR&o+j6$
zeSaGnL6d1JT^JtvZa7Q^_{;oajQu2XD`oB)!J7q(Tsvkvd0q3@t8z&RVM2aYf=xxv
z;b}GhtP^*NCUHG9HSM7ih3vUi2CmJ)k5Twd>Mg$s^UcQ2r3SY`@a7dibm6y%ZC7W)
zgZZMS&!`0@gxfADfmhX;7Q0Z~`YCK)XG`QZ>j1AA0Bksiss|rv@;yur8M?0pF;Pa9
z!2Xy301g4&mdGB{l30@iQ9|!OJYbF|Y?a?w91AMx&6P`pXaQ39ut0irk}&g6Gb%;^
w#)ZvXFJyXjP&St~<UIaE<{RBk&hCgG+$ZgmCQ?@fta}qaukS!0aC|fF4}u^iz5oCK

literal 0
HcmV?d00001

diff --git a/llvm/test/Bitcode/invalid.test b/llvm/test/Bitcode/invalid.test
index 2a9af0626c64e..d1f9d7c087455 100644
--- a/llvm/test/Bitcode/invalid.test
+++ b/llvm/test/Bitcode/invalid.test
@@ -235,3 +235,8 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-nonpointer-atomicrmw.bc 2>&1
 RUN:   FileCheck --check-prefix=NONPOINTER-ATOMICRMW %s
 
 NONPOINTER-ATOMICRMW: Invalid record
+
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-fcmp-opnum.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=INVALID-FCMP-OPNUM %s
+
+INVALID-FCMP-OPNUM: Invalid record: operand number exceeded available operands

From 03d5e28fe9438c327b49c67d3022a31ff2b53dc9 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 14 Jul 2019 14:08:39 +0000
Subject: [PATCH 056/451] [x86] add test for sub-with-flags opportunity
 (PR40483); NFC

llvm-svn: 366019
---
 llvm/test/CodeGen/X86/combine-sbb.ll | 47 +++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index 9e68ab4beb16b..bba72c5660943 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -280,7 +280,7 @@ define i32 @PR40483_sub4(i32*, i32) nounwind {
 
 ; Verify that a bogus cmov is simplified.
 
-define i32 @PR40483_sub5(i32*, i32) {
+define i32 @PR40483_sub5(i32*, i32) nounwind {
 ; X86-LABEL: PR40483_sub5:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -306,4 +306,49 @@ define i32 @PR40483_sub5(i32*, i32) {
   ret i32 %10
 }
 
+define i32 @PR40483_sub6(i32*, i32) nounwind {
+; X86-LABEL: PR40483_sub6:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl (%edx), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    subl %edi, %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    subl %edi, %esi
+; X86-NEXT:    movl %esi, (%edx)
+; X86-NEXT:    jae .LBB8_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:  .LBB8_2:
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+;
+; X64-LABEL: PR40483_sub6:
+; X64:       # %bb.0:
+; X64-NEXT:    movl (%rdi), %ecx
+; X64-NEXT:    movl %ecx, %edx
+; X64-NEXT:    subl %esi, %edx
+; X64-NEXT:    addl %edx, %edx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    subl %esi, %ecx
+; X64-NEXT:    movl %ecx, (%rdi)
+; X64-NEXT:    cmovbl %edx, %eax
+; X64-NEXT:    retq
+  %3 = load i32, i32* %0, align 8
+  %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1)
+  %5 = extractvalue { i8, i32 } %4, 1
+  store i32 %5, i32* %0, align 8
+  %6 = extractvalue { i8, i32 } %4, 0
+  %7 = icmp eq i8 %6, 0
+  %8 = sub i32 %3, %1
+  %9 = add i32 %8, %5
+  %10 = select i1 %7, i32 0, i32 %9
+  ret i32 %10
+}
+
 declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)

From 34c067331e465bb992693de7c6c4f9ee46f6de49 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 14 Jul 2019 15:05:05 +0000
Subject: [PATCH 057/451] [Hashing] hash_1to3_bytes - avoid trunc(v + zext(x))
 NFCI.

MSVC complains about the extension to uint64_t for an addition followed by truncation back to uint32_t - add an explicit uint32_t cast to avoid this.

llvm-svn: 366020
---
 llvm/include/llvm/ADT/Hashing.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index f639aa2054c4f..008188bfa2109 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -191,7 +191,7 @@ inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) {
   uint8_t b = s[len >> 1];
   uint8_t c = s[len - 1];
   uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
-  uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+  uint32_t z = static_cast<uint32_t>(len) + (static_cast<uint32_t>(c) << 2);
   return shift_mix(y * k2 ^ z * k3 ^ seed) * k2;
 }
 

From 57190b3974fbc07cdb5b61aeaf023de94c2afe7e Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 14 Jul 2019 15:55:32 +0000
Subject: [PATCH 058/451] [InstCombine] Add assume context test; NFC

Baseline test for D37215.

llvm-svn: 366021
---
 .../InstCombine/assume_inevitable.ll          | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/assume_inevitable.ll

diff --git a/llvm/test/Transforms/InstCombine/assume_inevitable.ll b/llvm/test/Transforms/InstCombine/assume_inevitable.ll
new file mode 100644
index 0000000000000..a063775b05729
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/assume_inevitable.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check that assume is propagated backwards through all
+; operations that are `isGuaranteedToTransferExecutionToSuccessor`
+; (it should reach the load and mark it as `align 32`).
+define i32 @assume_inevitable(i32* %a, i32* %b, i8* %c) {
+; CHECK-LABEL: @assume_inevitable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[M:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[LOADRES:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    [[LOADRES2:%.*]] = call i32 @llvm.annotation.i32(i32 [[LOADRES]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2)
+; CHECK-NEXT:    store i32 [[LOADRES2]], i32* [[A]], align 4
+; CHECK-NEXT:    [[DUMMY_EQ:%.*]] = icmp ugt i32 [[LOADRES]], 42
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[DUMMY_EQ]])
+; CHECK-NEXT:    [[M_I8:%.*]] = bitcast i64* [[M]] to i8*
+; CHECK-NEXT:    [[M_A:%.*]] = call i8* @llvm.ptr.annotation.p0i8(i8* nonnull [[M_I8]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2)
+; CHECK-NEXT:    [[M_X:%.*]] = bitcast i8* [[M_A]] to i64*
+; CHECK-NEXT:    [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[C:%.*]], i1 false, i1 false, i1 false)
+; CHECK-NEXT:    store i64 [[OBJSZ]], i64* [[M_X]], align 4
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %dummy = alloca i8, align 4
+  %m = alloca i64
+  %0 = load i32, i32* %a, align 4
+
+  ; START perform a bunch of inevitable operations
+  %loadres = load i32, i32* %b
+  %loadres2 = call i32 @llvm.annotation.i32(i32 %loadres, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2)
+  store i32 %loadres2, i32* %a
+
+  %dummy_eq = icmp ugt i32 %loadres, 42
+  tail call void @llvm.assume(i1 %dummy_eq)
+
+  call void @llvm.lifetime.start.p0i8(i64 1, i8* %dummy)
+  %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %dummy)
+  call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %dummy)
+  call void @llvm.lifetime.end.p0i8(i64 1, i8* %dummy)
+
+  %m_i8 = bitcast i64* %m to i8*
+  %m_a = call i8* @llvm.ptr.annotation.p0i8(i8* %m_i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2)
+  %m_x = bitcast i8* %m_a to i64*
+  %objsz = call i64 @llvm.objectsize.i64.p0i8(i8* %c, i1 false)
+  store i64 %objsz, i64* %m_x
+  ; END perform a bunch of inevitable operations
+
+  ; AND here's the assume:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  ret i32 %0
+}
+
+@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata"
+@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata"
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
+declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32)
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture)
+declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture)
+declare void @llvm.assume(i1)

From 194b337f3327691ee88007061c71225c934b3af0 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Sun, 14 Jul 2019 18:21:15 +0000
Subject: [PATCH 059/451] Avoid eager template instantiation caused by the
 variant narrowing checks.

The standard disallows narrowing conversions when constructing a variant.
This is checked by attempting to perform braced initialization of the
destination type from the argument type. However, braced initialization
can force the compiler (mostly clang) to eagerly instantiate the
constructors of the destintation type -- which can lead to errors in
a non-immediate context.

However, as variant is currently specified, the narrowing checks only
observably apply when the destination type is arithmetic. Meaning we can
skip the check for class types. Hense avoiding the hard errors.

In order to cause fewer build breakages, this patch avoids the narrowing
check except when the destination type is arithmetic.

llvm-svn: 366022
---
 libcxx/include/variant                        | 36 ++++++++++++++-----
 .../variant.variant/variant.ctor/T.pass.cpp   | 14 +++++++-
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/libcxx/include/variant b/libcxx/include/variant
index 420e8c2611f67..21871ae3c1231 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -1095,19 +1095,39 @@ struct __overload;
 template <>
 struct __overload<> { void operator()() const; };
 
+
+
+struct __no_narrowing_check {
+  template <class _Dest, class _Source>
+  using _Apply = __identity<_Dest>;
+};
+
+struct __narrowing_check {
+  template <class _Dest>
+  static auto __test_impl(_Dest (&&)[1]) -> __identity<_Dest>;
+  template <class _Dest, class _Source>
+  using _Apply = decltype(__test_impl<_Dest>({std::declval<_Source>()}));
+};
+
+template <class _Dest, class _Source>
+using __check_for_narrowing = typename _If<
+#ifdef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT
+    false &&
+#endif
+    is_arithmetic<_Dest>::value,
+    __narrowing_check,
+    __no_narrowing_check
+    >::template _Apply<_Dest, _Source>;
+
+
 template <class _Tp, class... _Types>
 struct __overload<_Tp, _Types...> : __overload<_Types...> {
   using __overload<_Types...>::operator();
 
-  static auto __test(_Tp (&&)[1]) -> __identity<_Tp>;
-
   template <class _Up>
-  auto operator()(_Tp, _Up&& __t) const
-#ifndef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT
-      -> decltype(__test({ _VSTD::forward<_Up>(__t) }));
-#else
-      -> __identity<_Tp>;
-#endif
+  auto operator()(_Tp, _Up&&) const ->
+
+  __check_for_narrowing<_Tp, _Up>;
 };
 
 template <class _Base, class _Tp>
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
index 55f8d11c1159b..ef07482858133 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
@@ -177,10 +177,22 @@ void test_T_ctor_basic() {
 #endif
 }
 
+struct BoomOnInt {
+  template <class T>
+  constexpr BoomOnInt(T) { static_assert(!std::is_same<T, int>::value, ""); }
+};
+
+void test_no_narrowing_check_for_class_types() {
+  using V = std::variant<int, BoomOnInt>;
+  V v(42);
+  assert(v.index() == 0);
+  assert(std::get<0>(v) == 42);
+}
+
 int main(int, char**) {
   test_T_ctor_basic();
   test_T_ctor_noexcept();
   test_T_ctor_sfinae();
-
+  test_no_narrowing_check_for_class_types();
   return 0;
 }

From 8eb86a15c5cfdcf5052e507916c65b52d4ca086e Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Sun, 14 Jul 2019 18:24:19 +0000
Subject: [PATCH 060/451] [GitSVN][NFC] Mark dry-run commits as such in the log
 output

Summary: This helps to avoid worries about the "dry run flag" while testing.

Reviewers: jyknight, rnk, mehdi_amini

Subscribers: bollu, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64697

llvm-svn: 366023
---
 llvm/utils/git-svn/git-llvm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm
index 399e55247b553..289898d15b5fb 100755
--- a/llvm/utils/git-svn/git-llvm
+++ b/llvm/utils/git-svn/git-llvm
@@ -420,8 +420,8 @@ def cmd_push(args):
     rev_range = args.rev_range
     dry_run = args.dry_run
     revs = get_revs_to_push(rev_range)
-    log('Pushing %d %s commit%s:\n%s' %
-        (len(revs),
+    log('%sPushing %d %s commit%s:\n%s' %
+        ('[DryRun] ' if dry_run else '', len(revs),
          'split-repo (%s)' % split_repo_path
          if split_repo_path else 'monorepo',
          's' if len(revs) != 1 else '',

From 8f1d7d1c55c76d315fb766128b8174389dbd46f5 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sun, 14 Jul 2019 18:25:09 +0000
Subject: [PATCH 061/451] consistency in the release notes

llvm-svn: 366024
---
 clang/docs/ReleaseNotes.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f0a35050dde08..f89447fc96a3c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -184,8 +184,8 @@ AST Matchers
 clang-format
 ------------
 
-- Add language support for clang-formatting C# files
-- Add Microsoft coding style to encapsulate default C# formatting style
+- Add language support for clang-formatting C# files.
+- Add Microsoft coding style to encapsulate default C# formatting style.
 - Added new option `PPDIS_BeforeHash` (in configuration: `BeforeHash`) to
   `IndentPPDirectives` which indents preprocessor directives before the hash.
 - Added new option `AlignConsecutiveMacros` to align the C/C++ preprocessor

From 273857d1ea9a39ab976b6dda446b8564e88d068a Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Sun, 14 Jul 2019 18:30:34 +0000
Subject: [PATCH 062/451] Harden variant test added in r366022

The test was brittle since it only went boom for one specific type, when
really it should go boom for all of them.

llvm-svn: 366025
---
 .../variant/variant.variant/variant.ctor/T.pass.cpp         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
index ef07482858133..d05e800b302ea 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
@@ -177,13 +177,13 @@ void test_T_ctor_basic() {
 #endif
 }
 
-struct BoomOnInt {
+struct BoomOnAnything {
   template <class T>
-  constexpr BoomOnInt(T) { static_assert(!std::is_same<T, int>::value, ""); }
+  constexpr BoomOnAnything(T) { static_assert(!std::is_same<T, T>::value, ""); }
 };
 
 void test_no_narrowing_check_for_class_types() {
-  using V = std::variant<int, BoomOnInt>;
+  using V = std::variant<int, BoomOnAnything>;
   V v(42);
   assert(v.index() == 0);
   assert(std::get<0>(v) == 42);

From aae0cb67ed79bb7dddcda3438afd1263104cc689 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Sun, 14 Jul 2019 18:31:55 +0000
Subject: [PATCH 063/451] Cleanup whitespace in <variant>. NFC.

llvm-svn: 366026
---
 libcxx/include/variant | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/libcxx/include/variant b/libcxx/include/variant
index 21871ae3c1231..88a625df71e1c 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -1089,14 +1089,6 @@ private:
   }
 };
 
-template <class... _Types>
-struct __overload;
-
-template <>
-struct __overload<> { void operator()() const; };
-
-
-
 struct __no_narrowing_check {
   template <class _Dest, class _Source>
   using _Apply = __identity<_Dest>;
@@ -1120,14 +1112,18 @@ using __check_for_narrowing = typename _If<
     >::template _Apply<_Dest, _Source>;
 
 
+template <class... _Types>
+struct __overload;
+
+template <>
+struct __overload<> { void operator()() const; };
+
 template <class _Tp, class... _Types>
 struct __overload<_Tp, _Types...> : __overload<_Types...> {
   using __overload<_Types...>::operator();
 
   template <class _Up>
-  auto operator()(_Tp, _Up&&) const ->
-
-  __check_for_narrowing<_Tp, _Up>;
+  auto operator()(_Tp, _Up&&) const -> __check_for_narrowing<_Tp, _Up>;
 };
 
 template <class _Base, class _Tp>

From 24cacf9c56f0b55534e98941cc8675a9a7489c37 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas@arm.com>
Date: Sun, 14 Jul 2019 18:32:42 +0000
Subject: [PATCH 064/451] [clang][Driver][ARM] Favor -mfpu over default CPU
 features

When processing the command line options march, mcpu and mfpu, we store
the implied target features on a vector. The change D62998 introduced a
temporary vector, where the processed features get accumulated. When
calling DecodeARMFeaturesFromCPU, which sets the default features for
the specified CPU, we certainly don't want to override the features
that have been explicitly specified on the command line. Therefore, the
default features should appear first in the final vector. This problem
became evident once I added the missing (unhandled) target features in
ARM::getExtensionFeatures.

Differential Revision: https://reviews.llvm.org/D63936

llvm-svn: 366027
---
 clang/lib/Driver/ToolChains/Arch/ARM.cpp      |  6 +++-
 clang/test/CodeGen/arm-target-features.c      |  2 +-
 llvm/include/llvm/Support/ARMTargetParser.def |  1 +
 llvm/lib/Support/ARMTargetParser.cpp          | 30 ++++---------------
 llvm/unittests/Support/TargetParserTest.cpp   | 15 +++++-----
 5 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 2e7562c6ee272..d1db583e52802 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -376,7 +376,11 @@ void arm::getARMTargetFeatures(const ToolChain &TC,
         Features.push_back(
             Args.MakeArgString((F.second ? "+" : "-") + F.first()));
   } else if (!CPUName.empty()) {
-    DecodeARMFeaturesFromCPU(D, CPUName, ExtensionFeatures);
+    // This sets the default features for the specified CPU. We certainly don't
+    // want to override the features that have been explicitly specified on the
+    // command line. Therefore, process them directly instead of appending them
+    // at the end later.
+    DecodeARMFeaturesFromCPU(D, CPUName, Features);
   }
 
   if (CPUArg)
diff --git a/clang/test/CodeGen/arm-target-features.c b/clang/test/CodeGen/arm-target-features.c
index ec165aeec2881..a0fbafc5d0c54 100644
--- a/clang/test/CodeGen/arm-target-features.c
+++ b/clang/test/CodeGen/arm-target-features.c
@@ -32,7 +32,7 @@
 
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82
-// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode,+vfp2,+vfp2d16,+vfp2d16sp,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp"
+// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode,+vfp2,+vfp2d16,+vfp2d16sp,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp"
 
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM
 // CHECK-BASIC-V8-ARM: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+vfp2,+vfp2d16,+vfp2d16sp,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode"
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 593480f2f1892..6e17c8cbfd784 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -148,6 +148,7 @@ ARM_ARCH_EXT_NAME("aes",      ARM::AEK_AES,      "+aes",   "-aes")
 ARM_ARCH_EXT_NAME("dotprod",  ARM::AEK_DOTPROD,  "+dotprod","-dotprod")
 ARM_ARCH_EXT_NAME("dsp",      ARM::AEK_DSP,      "+dsp",   "-dsp")
 ARM_ARCH_EXT_NAME("fp",       ARM::AEK_FP,       nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("fp.dp",    ARM::AEK_FP_DP,    nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("mve",      ARM::AEK_SIMD,     "+mve",   "-mve")
 ARM_ARCH_EXT_NAME("mve.fp",   (ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp")
 ARM_ARCH_EXT_NAME("idiv",     (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr)
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index e91b508eefd51..27d1e5527be47 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -409,30 +409,12 @@ bool ARM::getExtensionFeatures(unsigned Extensions,
   if (Extensions == AEK_INVALID)
     return false;
 
-  if (Extensions & AEK_CRC)
-    Features.push_back("+crc");
-  else
-    Features.push_back("-crc");
-
-  if (Extensions & AEK_DSP)
-    Features.push_back("+dsp");
-  else
-    Features.push_back("-dsp");
-
-  if (Extensions & AEK_FP16FML)
-    Features.push_back("+fp16fml");
-  else
-    Features.push_back("-fp16fml");
-
-  if (Extensions & AEK_RAS)
-    Features.push_back("+ras");
-  else
-    Features.push_back("-ras");
-
-  if (Extensions & AEK_DOTPROD)
-    Features.push_back("+dotprod");
-  else
-    Features.push_back("-dotprod");
+  for (const auto AE : ARCHExtNames) {
+    if ((Extensions & AE.ID) == AE.ID && AE.Feature)
+      Features.push_back(AE.Feature);
+    else if (AE.NegFeature)
+      Features.push_back(AE.NegFeature);
+  }
 
   return getHWDivFeatures(Extensions, Features);
 }
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index b11d9ae7f0061..34c7a8a4fd1c8 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -571,17 +571,18 @@ TEST(TargetParserTest, ARMFPURestriction) {
 TEST(TargetParserTest, ARMExtensionFeatures) {
   std::map<unsigned, std::vector<StringRef>> Extensions;
 
-  Extensions[ARM::AEK_CRC]        = { "+crc",       "-crc" };
-  Extensions[ARM::AEK_DSP]        = { "+dsp",       "-dsp" };
+  for (auto &Ext : ARM::ARCHExtNames) {
+    if (Ext.Feature && Ext.NegFeature)
+      Extensions[Ext.ID] = { StringRef(Ext.Feature),
+                             StringRef(Ext.NegFeature) };
+  }
+
   Extensions[ARM::AEK_HWDIVARM]   = { "+hwdiv-arm", "-hwdiv-arm" };
   Extensions[ARM::AEK_HWDIVTHUMB] = { "+hwdiv",     "-hwdiv" };
-  Extensions[ARM::AEK_RAS]        = { "+ras",       "-ras" };
-  Extensions[ARM::AEK_FP16FML]    = { "+fp16fml",   "-fp16fml" };
-  Extensions[ARM::AEK_DOTPROD]    = { "+dotprod",   "-dotprod" };
 
   std::vector<StringRef> Features;
 
-  EXPECT_FALSE(AArch64::getExtensionFeatures(ARM::AEK_INVALID, Features));
+  EXPECT_FALSE(ARM::getExtensionFeatures(ARM::AEK_INVALID, Features));
 
   for (auto &E : Extensions) {
     // test +extension
@@ -598,7 +599,7 @@ TEST(TargetParserTest, ARMExtensionFeatures) {
     Found = std::find(std::begin(Features), std::end(Features), E.second.at(1));
     EXPECT_TRUE(Found != std::end(Features));
     EXPECT_TRUE(Extensions.size() == Features.size());
-   }
+  }
 }
 
 TEST(TargetParserTest, ARMFPUFeatures) {

From fff5dc0b173fc35ce9a75a737a980875be125566 Mon Sep 17 00:00:00 2001
From: JF Bastien <jfbastien@apple.com>
Date: Sun, 14 Jul 2019 18:33:51 +0000
Subject: [PATCH 065/451] Support __seg_fs and __seg_gs on x86

Summary:
GCC supports named address spaces macros:
  https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html

clang does as well with address spaces:
  https://clang.llvm.org/docs/LanguageExtensions.html#memory-references-to-specified-segments

Add the __seg_fs and __seg_gs macros for compatibility with GCC.

<rdar://problem/52944935>

Subscribers: jkorous, dexonsmith, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64676

llvm-svn: 366028
---
 clang/docs/LanguageExtensions.rst       | 4 ++++
 clang/lib/Basic/Targets/X86.cpp         | 5 +++++
 clang/test/Preprocessor/x86_seg_fs_gs.c | 7 +++++++
 3 files changed, 16 insertions(+)
 create mode 100644 clang/test/Preprocessor/x86_seg_fs_gs.c

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index ecbf04c3c822a..266309c6ce248 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -2465,6 +2465,10 @@ Which compiles to (on X86-32):
           movl    %gs:(%eax), %eax
           ret
 
+You can also use the GCC compatibility macros ``__seg_fs`` and ``__seg_gs`` for
+the same purpose. The preprocessor symbols ``__SEG_FS`` and ``__SEG_GS``
+indicate their support.
+
 PowerPC Language Extensions
 ------------------------------
 
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 76d8ab8146ffb..d618c90b05c02 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -917,6 +917,11 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     DefineStd(Builder, "i386", Opts);
   }
 
+  Builder.defineMacro("__SEG_GS");
+  Builder.defineMacro("__SEG_FS");
+  Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))");
+  Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))");
+
   // Subtarget options.
   // FIXME: We are hard-coding the tune parameters based on the CPU, but they
   // truly should be based on -mtune options.
diff --git a/clang/test/Preprocessor/x86_seg_fs_gs.c b/clang/test/Preprocessor/x86_seg_fs_gs.c
new file mode 100644
index 0000000000000..b7a586c820245
--- /dev/null
+++ b/clang/test/Preprocessor/x86_seg_fs_gs.c
@@ -0,0 +1,7 @@
+// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s
+// RUN: %clang -target x86_64-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s
+
+// CHECK: #define __SEG_FS 1
+// CHECK: #define __SEG_GS 1
+// CHECK: #define __seg_fs __attribute__((address_space(257)))
+// CHECK: #define __seg_gs __attribute__((address_space(256)))

From 8111807a03c7ecc340fe2d8497b422b09e111fe9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 14 Jul 2019 19:13:09 +0000
Subject: [PATCH 066/451] Fix uninitialized variable analyzer warning. NFCI.

llvm-svn: 366029
---
 clang/lib/Sema/SemaDecl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 1324abb04a741..ee7950de45e73 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12192,7 +12192,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
 
   // Cache the result of checking for constant initialization.
   Optional<bool> CacheHasConstInit;
-  const Expr *CacheCulprit;
+  const Expr *CacheCulprit = nullptr;
   auto checkConstInit = [&]() mutable {
     if (!CacheHasConstInit)
       CacheHasConstInit = var->getInit()->isConstantInitializer(

From 9428d95ce7f84844a076fe13219db96a78e3bd44 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sun, 14 Jul 2019 20:12:36 +0000
Subject: [PATCH 067/451] [LV] Exclude loop-invariant inputs from scalar cost
 computation.

Loop invariant operands do not need to be scalarized, as we are using
the values outside the loop. We should ignore them when computing the
scalarization overhead.

Fixes PR41294

Reviewers: hsaito, rengolin, dcaballe, Ayal

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D59995

llvm-svn: 366030
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  64 ++++++----
 .../extractvalue-no-scalarization-required.ll | 109 ++++++++++++++++++
 2 files changed, 151 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c1bb43bc5bdb8..22cf9c7db9490 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1179,7 +1179,7 @@ class LoopVectorizationCostModel {
   /// VF. Return the cost of the instruction, including scalarization overhead
   /// if it's needed. The flag NeedToScalarize shows if the call needs to be
   /// scalarized -
-  // i.e. either vector version isn't available, or is too expensive.
+  /// i.e. either vector version isn't available, or is too expensive.
   unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
 
 private:
@@ -1332,6 +1332,30 @@ class LoopVectorizationCostModel {
 
   DecisionList WideningDecisions;
 
+  /// Returns true if \p V is expected to be vectorized and it needs to be
+  /// extracted.
+  bool needsExtract(Value *V, unsigned VF) const {
+    Instruction *I = dyn_cast<Instruction>(V);
+    if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I))
+      return false;
+
+    // Assume we can vectorize V (and hence we need extraction) if the
+    // scalars are not computed yet. This can happen, because it is called
+    // via getScalarizationOverhead from setCostBasedWideningDecision, before
+    // the scalars are collected. That should be a safe assumption in most
+    // cases, because we check if the operands have vectorizable types
+    // beforehand in LoopVectorizationLegality.
+    return Scalars.find(VF) == Scalars.end() ||
+           !isScalarAfterVectorization(I, VF);
+  };
+
+  /// Returns a range containing only operands needing to be extracted.
+  SmallVector<Value *, 4> filterExtractingOperands(Instruction::op_range Ops,
+                                                   unsigned VF) {
+    return SmallVector<Value *, 4>(make_filter_range(
+        Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
+  }
+
 public:
   /// The loop that we evaluate.
   Loop *TheLoop;
@@ -3125,8 +3149,11 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
   if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
     FMF = FPMO->getFastMathFlags();
 
-  SmallVector<Value *, 4> Operands(CI->arg_operands());
-  return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
+  // Skip operands that do not require extraction/scalarization and do not incur
+  // any overhead.
+  return TTI.getIntrinsicInstrCost(
+      ID, CI->getType(), filterExtractingOperands(CI->arg_operands(), VF), FMF,
+      VF);
 }
 
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@@ -5346,15 +5373,6 @@ int LoopVectorizationCostModel::computePredInstDiscount(
     return true;
   };
 
-  // Returns true if an operand that cannot be scalarized must be extracted
-  // from a vector. We will account for this scalarization overhead below. Note
-  // that the non-void predicated instructions are placed in their own blocks,
-  // and their return values are inserted into vectors. Thus, an extract would
-  // still be required.
-  auto needsExtract = [&](Instruction *I) -> bool {
-    return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
-  };
-
   // Compute the expected cost discount from scalarizing the entire expression
   // feeding the predicated instruction. We currently only consider expressions
   // that are single-use instruction chains.
@@ -5394,7 +5412,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
                "Instruction has non-scalar type");
         if (canBeScalarized(J))
           Worklist.push_back(J);
-        else if (needsExtract(J))
+        else if (needsExtract(J, VF))
           ScalarCost += TTI.getScalarizationOverhead(
                               ToVectorTy(J->getType(),VF), false, true);
       }
@@ -5684,16 +5702,18 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
   if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
     return Cost;
 
-  if (CallInst *CI = dyn_cast<CallInst>(I)) {
-    SmallVector<const Value *, 4> Operands(CI->arg_operands());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  } else if (!isa<StoreInst>(I) ||
-             !TTI.supportsEfficientVectorElementLoadStore()) {
-    SmallVector<const Value *, 4> Operands(I->operand_values());
-    Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
-  }
+  // Some targets support efficient element stores.
+  if (isa<StoreInst>(I) && TTI.supportsEfficientVectorElementLoadStore())
+    return Cost;
 
-  return Cost;
+  // Collect operands to consider.
+  CallInst *CI = dyn_cast<CallInst>(I);
+  Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+
+  // Skip operands that do not require extraction/scalarization and do not incur
+  // any overhead.
+  return Cost + TTI.getOperandsScalarizationOverhead(
+                    filterExtractingOperands(Ops, VF), VF);
 }
 
 void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
new file mode 100644
index 0000000000000..c3ad5b078ae0b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -0,0 +1,109 @@
+; REQUIRES: asserts
+
+; RUN: opt -loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 %s -S | FileCheck --check-prefix=FORCED %s
+
+; Test case from PR41294.
+
+; Check scalar cost for extractvalue. The constant and loop invariant operands are free,
+; leaving cost 3 for scalarizing the result + 2 for executing the op with VF 2.
+
+; CM: LV: Scalar loop costs: 7.
+; CM: LV: Found an estimated cost of 5 for VF 2 For instruction:   %a = extractvalue { i64, i64 } %sv, 0
+; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction:   %b = extractvalue { i64, i64 } %sv, 1
+
+; Check that the extractvalue operands are actually free in vector code.
+
+; FORCED-LABEL: vector.body:                                      ; preds = %vector.body, %vector.ph
+; FORCED-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0
+; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer
+; FORCED-NEXT:    %induction = add <2 x i32> %broadcast.splat, <i32 0, i32 1>
+; FORCED-NEXT:    %0 = add i32 %index, 0
+; FORCED-NEXT:    %1 = extractvalue { i64, i64 } %sv, 0
+; FORCED-NEXT:    %2 = extractvalue { i64, i64 } %sv, 0
+; FORCED-NEXT:    %3 = insertelement <2 x i64> undef, i64 %1, i32 0
+; FORCED-NEXT:    %4 = insertelement <2 x i64> %3, i64 %2, i32 1
+; FORCED-NEXT:    %5 = extractvalue { i64, i64 } %sv, 1
+; FORCED-NEXT:    %6 = extractvalue { i64, i64 } %sv, 1
+; FORCED-NEXT:    %7 = insertelement <2 x i64> undef, i64 %5, i32 0
+; FORCED-NEXT:    %8 = insertelement <2 x i64> %7, i64 %6, i32 1
+; FORCED-NEXT:    %9 = getelementptr i64, i64* %dst, i32 %0
+; FORCED-NEXT:    %10 = add <2 x i64> %4, %8
+; FORCED-NEXT:    %11 = getelementptr i64, i64* %9, i32 0
+; FORCED-NEXT:    %12 = bitcast i64* %11 to <2 x i64>*
+; FORCED-NEXT:    store <2 x i64> %10, <2 x i64>* %12, align 4
+; FORCED-NEXT:    %index.next = add i32 %index, 2
+; FORCED-NEXT:    %13 = icmp eq i32 %index.next, 0
+; FORCED-NEXT:    br i1 %13, label %middle.block, label %vector.body, !llvm.loop !0
+
+define void @test1(i64* %dst, {i64, i64} %sv) {
+entry:
+  br label %loop.body
+
+loop.body:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
+  %a = extractvalue { i64, i64 } %sv, 0
+  %b = extractvalue { i64, i64 } %sv, 1
+  %addr = getelementptr i64, i64* %dst, i32 %iv
+  %add = add i64 %a, %b
+  store i64 %add, i64* %addr
+  %iv.next = add nsw i32 %iv, 1
+  %cond = icmp ne i32 %iv.next, 0
+  br i1 %cond, label %loop.body, label %exit
+
+exit:
+  ret void
+}
+
+
+; Similar to the test case above, but checks getVectorCallCost as well.
+declare float @pow(float, float) readnone nounwind
+
+; CM: LV: Scalar loop costs: 16.
+; CM: LV: Found an estimated cost of 5 for VF 2 For instruction:   %a = extractvalue { float, float } %sv, 0
+; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction:   %b = extractvalue { float, float } %sv, 1
+
+; FORCED-LABEL: define void @test_getVectorCallCost
+
+; FORCED-LABEL: vector.body:                                      ; preds = %vector.body, %vector.ph
+; FORCED-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; FORCED-NEXT:    %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0
+; FORCED-NEXT:    %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer
+; FORCED-NEXT:    %induction = add <2 x i32> %broadcast.splat, <i32 0, i32 1>
+; FORCED-NEXT:    %0 = add i32 %index, 0
+; FORCED-NEXT:    %1 = extractvalue { float, float } %sv, 0
+; FORCED-NEXT:    %2 = extractvalue { float, float } %sv, 0
+; FORCED-NEXT:    %3 = insertelement <2 x float> undef, float %1, i32 0
+; FORCED-NEXT:    %4 = insertelement <2 x float> %3, float %2, i32 1
+; FORCED-NEXT:    %5 = extractvalue { float, float } %sv, 1
+; FORCED-NEXT:    %6 = extractvalue { float, float } %sv, 1
+; FORCED-NEXT:    %7 = insertelement <2 x float> undef, float %5, i32 0
+; FORCED-NEXT:    %8 = insertelement <2 x float> %7, float %6, i32 1
+; FORCED-NEXT:    %9 = getelementptr float, float* %dst, i32 %0
+; FORCED-NEXT:    %10 = call <2 x float> @llvm.pow.v2f32(<2 x float> %4, <2 x float> %8)
+; FORCED-NEXT:    %11 = getelementptr float, float* %9, i32 0
+; FORCED-NEXT:    %12 = bitcast float* %11 to <2 x float>*
+; FORCED-NEXT:    store <2 x float> %10, <2 x float>* %12, align 4
+; FORCED-NEXT:    %index.next = add i32 %index, 2
+; FORCED-NEXT:    %13 = icmp eq i32 %index.next, 0
+; FORCED-NEXT:    br i1 %13, label %middle.block, label %vector.body, !llvm.loop !4
+
+define void @test_getVectorCallCost(float* %dst, {float, float} %sv) {
+entry:
+  br label %loop.body
+
+loop.body:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
+  %a = extractvalue { float, float } %sv, 0
+  %b = extractvalue { float, float } %sv, 1
+  %addr = getelementptr float, float* %dst, i32 %iv
+  %p = call float @pow(float %a, float %b)
+  store float %p, float* %addr
+  %iv.next = add nsw i32 %iv, 1
+  %cond = icmp ne i32 %iv.next, 0
+  br i1 %cond, label %loop.body, label %exit
+
+exit:
+  ret void
+}

From 951bb68ce262545bdb0bff536256e0514daf0046 Mon Sep 17 00:00:00 2001
From: Alexandros Lamprineas <alexandros.lamprineas@arm.com>
Date: Sun, 14 Jul 2019 20:31:15 +0000
Subject: [PATCH 068/451] [TargetParser][ARM] Account dependencies when
 processing target features

Teaches ARM::appendArchExtFeatures to account dependencies when processing
target features: i.e. when you say -march=armv8.1-m.main+mve.fp+nofp it
means mve.fp should get discarded too. (Split from D63936)

Differential Revision: https://reviews.llvm.org/D64048

llvm-svn: 366031
---
 clang/test/Preprocessor/arm-target-features.c | 29 +++++++++++++++----
 llvm/include/llvm/Support/ARMTargetParser.def |  4 +--
 llvm/lib/Support/ARMTargetParser.cpp          | 26 +++++++++++++----
 3 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c
index 95231e2a8125a..df5af4a933762 100644
--- a/clang/test/Preprocessor/arm-target-features.c
+++ b/clang/test/Preprocessor/arm-target-features.c
@@ -762,12 +762,29 @@
 // CHECK-V81M-MVE: #define __ARM_FEATURE_MVE 1
 // CHECK-V81M-MVE: #define __ARM_FEATURE_SIMD32 1
 
-// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVE-FP %s
-// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_DSP 1
-// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
-// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_MVE 3
-// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_SIMD32 1
-// CHECK-V81M-MVE-FP: #define __ARM_FPV5__ 1
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVEFP %s
+// CHECK-V81M-MVEFP: #define __ARM_FEATURE_DSP 1
+// CHECK-V81M-MVEFP: #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1
+// CHECK-V81M-MVEFP: #define __ARM_FEATURE_MVE 3
+// CHECK-V81M-MVEFP: #define __ARM_FEATURE_SIMD32 1
+// CHECK-V81M-MVEFP: #define __ARM_FPV5__ 1
+
+// nofp discards mve.fp
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+nofp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVEFP-NOFP %s
+// CHECK-V81M-MVEFP-NOFP-NOT: #define __ARM_FEATURE_MVE
+
+// nomve discards mve.fp
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+nomve -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVEFP-NOMVE %s
+// CHECK-V81M-MVEFP-NOMVE-NOT: #define __ARM_FEATURE_MVE
+
+// mve+fp doesn't imply mve.fp
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve+fp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVE-FP %s
+// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_MVE 1
+
+// nodsp discards both dsp and mve
+// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve+nodsp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVE-NODSP %s
+// CHECK-V81M-MVE-NODSP-NOT: #define __ARM_FEATURE_MVE
+// CHECK-V81M-MVE-NODSP-NOT: #define __ARM_FEATURE_DSP
 
 // RUN: %clang -target armv8.1a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81A %s
 // CHECK-V81A: #define __ARM_ARCH 8
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 6e17c8cbfd784..f466b32527481 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -149,8 +149,8 @@ ARM_ARCH_EXT_NAME("dotprod",  ARM::AEK_DOTPROD,  "+dotprod","-dotprod")
 ARM_ARCH_EXT_NAME("dsp",      ARM::AEK_DSP,      "+dsp",   "-dsp")
 ARM_ARCH_EXT_NAME("fp",       ARM::AEK_FP,       nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("fp.dp",    ARM::AEK_FP_DP,    nullptr,  nullptr)
-ARM_ARCH_EXT_NAME("mve",      ARM::AEK_SIMD,     "+mve",   "-mve")
-ARM_ARCH_EXT_NAME("mve.fp",   (ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp")
+ARM_ARCH_EXT_NAME("mve",     (ARM::AEK_DSP | ARM::AEK_SIMD), "+mve", "-mve")
+ARM_ARCH_EXT_NAME("mve.fp",  (ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp")
 ARM_ARCH_EXT_NAME("idiv",     (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr)
 ARM_ARCH_EXT_NAME("mp",       ARM::AEK_MP,       nullptr,  nullptr)
 ARM_ARCH_EXT_NAME("simd",     ARM::AEK_SIMD,     nullptr,  nullptr)
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index 27d1e5527be47..be948cfc95d43 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -490,16 +490,30 @@ static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
   return ARM::FK_INVALID;
 }
 
+static unsigned getAEKID(StringRef ArchExtName) {
+  for (const auto AE : ARM::ARCHExtNames)
+    if (AE.getName() == ArchExtName)
+      return AE.ID;
+  return ARM::AEK_INVALID;
+}
+
 bool ARM::appendArchExtFeatures(
   StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
   std::vector<StringRef> &Features) {
-  StringRef StandardFeature = getArchExtFeature(ArchExt);
-  if (!StandardFeature.empty()) {
-    Features.push_back(StandardFeature);
-    return true;
-  }
 
+  size_t StartingNumFeatures = Features.size();
   const bool Negated = stripNegationPrefix(ArchExt);
+  unsigned ID = getAEKID(ArchExt);
+
+  if (ID == AEK_INVALID)
+    return false;
+
+  for (const auto AE : ARCHExtNames) {
+    if (Negated && (AE.ID & ID) == ID && AE.NegFeature)
+      Features.push_back(AE.NegFeature);
+    else if (AE.ID == ID && AE.Feature)
+      Features.push_back(AE.Feature);
+  }
 
   if (CPU == "")
     CPU = "generic";
@@ -519,7 +533,7 @@ bool ARM::appendArchExtFeatures(
     }
     return ARM::getFPUFeatures(FPUKind, Features);
   }
-  return false;
+  return StartingNumFeatures != Features.size();
 }
 
 StringRef ARM::getHWDivName(unsigned HWDivKind) {

From 3c0e2bb0cba205166f85811140da0f6c17ae8280 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Sun, 14 Jul 2019 20:59:51 +0000
Subject: [PATCH 069/451] Add test for variant construction with duplicate
 types.

llvm-svn: 366032
---
 .../variant/variant.variant/variant.ctor/T.pass.cpp  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
index d05e800b302ea..42a31f3345075 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
@@ -189,10 +189,22 @@ void test_no_narrowing_check_for_class_types() {
   assert(std::get<0>(v) == 42);
 }
 
+struct Bar {};
+struct Baz {};
+void test_construction_with_repeated_types() {
+  using V = std::variant<int, Bar, Baz, int, Baz, int, int>;
+  static_assert(!std::is_constructible<V, int>::value, "");
+  static_assert(!std::is_constructible<V, Baz>::value, "");
+  // OK, the selected type appears only once and so it shouldn't
+  // be affected by the duplicate types.
+  static_assert(std::is_constructible<V, Bar>::value, "");
+}
+
 int main(int, char**) {
   test_T_ctor_basic();
   test_T_ctor_noexcept();
   test_T_ctor_sfinae();
   test_no_narrowing_check_for_class_types();
+  test_construction_with_repeated_types();
   return 0;
 }

From 4066978cb7a493abe303f81d930b1de8ee639909 Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Sun, 14 Jul 2019 21:29:39 +0000
Subject: [PATCH 070/451] Improve compile time of variant.

In particular, improve the compile time of the overload set builder
that variant uses to determine which alternative to construct.

Instead of having the __overload type construct itself recursively,
this patch uses a flat construction for the overload set.

llvm-svn: 366033
---
 libcxx/include/variant                        |  67 +++++-----
 .../stress_test_variant_overloads_impl.sh.cpp | 118 ++++++++++++++++++
 2 files changed, 154 insertions(+), 31 deletions(-)
 create mode 100644 libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp

diff --git a/libcxx/include/variant b/libcxx/include/variant
index 88a625df71e1c..98a62c992fa12 100644
--- a/libcxx/include/variant
+++ b/libcxx/include/variant
@@ -1098,59 +1098,64 @@ struct __narrowing_check {
   template <class _Dest>
   static auto __test_impl(_Dest (&&)[1]) -> __identity<_Dest>;
   template <class _Dest, class _Source>
-  using _Apply = decltype(__test_impl<_Dest>({std::declval<_Source>()}));
+  using _Apply _LIBCPP_NODEBUG_TYPE = decltype(__test_impl<_Dest>({std::declval<_Source>()}));
 };
 
 template <class _Dest, class _Source>
-using __check_for_narrowing = typename _If<
+using __check_for_narrowing _LIBCPP_NODEBUG_TYPE =
+  typename _If<
 #ifdef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT
     false &&
 #endif
     is_arithmetic<_Dest>::value,
     __narrowing_check,
     __no_narrowing_check
-    >::template _Apply<_Dest, _Source>;
-
-
-template <class... _Types>
-struct __overload;
-
-template <>
-struct __overload<> { void operator()() const; };
-
-template <class _Tp, class... _Types>
-struct __overload<_Tp, _Types...> : __overload<_Types...> {
-  using __overload<_Types...>::operator();
+  >::template _Apply<_Dest, _Source>;
 
+template <class _Tp, size_t _Idx>
+struct __overload {
   template <class _Up>
   auto operator()(_Tp, _Up&&) const -> __check_for_narrowing<_Tp, _Up>;
 };
 
-template <class _Base, class _Tp>
-struct __overload_bool : _Base {
-  using _Base::operator();
-
+template <class _Tp, size_t>
+struct __overload_bool  {
   template <class _Up, class _Ap = __uncvref_t<_Up>>
   auto operator()(bool, _Up&&) const
       -> enable_if_t<is_same_v<_Ap, bool>, __identity<_Tp>>;
 };
 
-template <class... _Types>
-struct __overload<bool, _Types...>
-    : __overload_bool<__overload<_Types...>, bool> {};
-template <class... _Types>
-struct __overload<bool const, _Types...>
-    : __overload_bool<__overload<_Types...>, bool const> {};
-template <class... _Types>
-struct __overload<bool volatile, _Types...>
-    : __overload_bool<__overload<_Types...>, bool volatile> {};
-template <class... _Types>
-struct __overload<bool const volatile, _Types...>
-    : __overload_bool<__overload<_Types...>, bool const volatile> {};
+template <size_t _Idx>
+struct __overload<bool, _Idx> : __overload_bool<bool, _Idx> {};
+template <size_t _Idx>
+struct __overload<bool const, _Idx> : __overload_bool<bool const, _Idx> {};
+template <size_t _Idx>
+struct __overload<bool volatile, _Idx> : __overload_bool<bool volatile, _Idx> {};
+template <size_t _Idx>
+struct __overload<bool const volatile, _Idx> : __overload_bool<bool const volatile, _Idx> {};
+
+template <class ..._Bases>
+struct __all_overloads : _Bases... {
+  void operator()() const;
+  using _Bases::operator()...;
+};
+
+template <class IdxSeq>
+struct __make_overloads_imp;
+
+template <size_t ..._Idx>
+struct __make_overloads_imp<__tuple_indices<_Idx...> > {
+  template <class ..._Types>
+  using _Apply _LIBCPP_NODEBUG_TYPE = __all_overloads<__overload<_Types, _Idx>...>;
+};
+
+template <class ..._Types>
+using _MakeOverloads _LIBCPP_NODEBUG_TYPE = typename __make_overloads_imp<
+    __make_indices_imp<sizeof...(_Types), 0> >::template _Apply<_Types...>;
 
 template <class _Tp, class... _Types>
 using __best_match_t =
-    typename invoke_result_t<__overload<_Types...>, _Tp, _Tp>::type;
+    typename invoke_result_t<_MakeOverloads<_Types...>, _Tp, _Tp>::type;
 
 } // __variant_detail
 
diff --git a/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp b/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp
new file mode 100644
index 0000000000000..013d434f40d7b
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp
@@ -0,0 +1,118 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a dummy feature that prevents this test from running by default.
+// REQUIRES: template-cost-testing
+
+// Test the cost of the mechanism used to create an overload set used by variant
+// to determine which alternative to construct.
+
+// The table below compares the compile time and object size for each of the
+// variants listed in the RUN script.
+//
+//  Impl           Compile Time  Object Size
+// -----------------------------------------------------
+// flat:              959 ms        792 KiB
+// recursive:      23,444 ms     23,000 KiB
+// -----------------------------------------------------
+// variant_old:    16,894 ms     17,000 KiB
+// variant_new:     1,105 ms        828 KiB
+
+
+// RUN: %cxx %flags %compile_flags -std=c++17 -c %s \
+// RUN:    -ggdb  -ggnu-pubnames -ftemplate-depth=5000 -ftime-trace -g \
+// RUN:    -DTEST_NS=flat_impl -o %S/flat.o
+// RUN: %cxx %flags %compile_flags -std=c++17 -c %s \
+// RUN:    -ggdb  -ggnu-pubnames -ftemplate-depth=5000 -ftime-trace -g \
+// RUN:    -DTEST_NS=rec_impl -o %S/rec.o
+// RUN: %cxx %flags %compile_flags -std=c++17 -c %s \
+// RUN:    -ggdb  -ggnu-pubnames -ftemplate-depth=5000 -ftime-trace -g \
+// RUN:    -DTEST_NS=variant_impl -o %S/variant.o
+
+#include <type_traits>
+#include <tuple>
+#include <cassert>
+#include <variant>
+
+#include "test_macros.h"
+#include "template_cost_testing.h"
+
+template <size_t Idx>
+struct TestType {};
+
+template <class T>
+struct ID {
+  using type = T;
+};
+
+namespace flat_impl {
+
+struct OverloadBase { void operator()() const; };
+
+template <class Tp, size_t Idx>
+struct Overload {
+  auto operator()(Tp, Tp) const -> ID<Tp>;
+};
+
+template <class ...Bases>
+struct AllOverloads : OverloadBase, Bases... {};
+
+template <class IdxSeq>
+struct MakeOverloads;
+
+template <size_t ..._Idx>
+struct MakeOverloads<std::__tuple_indices<_Idx...> > {
+  template <class ...Types>
+  using Apply = AllOverloads<Overload<Types, _Idx>...>;
+};
+
+template <class ...Types>
+using Overloads = typename MakeOverloads<
+    std::__make_indices_imp<sizeof...(Types), 0> >::template Apply<Types...>;
+
+} // namespace flat_impl
+
+
+namespace rec_impl {
+
+template <class... Types> struct Overload;
+
+template <>
+struct Overload<> { void operator()() const; };
+
+template <class Tp, class... Types>
+struct Overload<Tp, Types...> : Overload<Types...> {
+  using Overload<Types...>::operator();
+  auto operator()(Tp, Tp) const -> ID<Tp>;
+};
+
+template <class... Types>
+using Overloads = Overload<Types...>;
+
+} // namespace rec_impl
+
+namespace variant_impl {
+  template <class ...Types>
+  using Overloads = std::__variant_detail::_MakeOverloads<Types...>;
+} // naamespace variant_impl
+
+#ifndef TEST_NS
+#error TEST_NS must be defined
+#endif
+
+#define TEST_TYPE() TestType< __COUNTER__ >,
+using T1 = TEST_NS::Overloads<REPEAT_1000(TEST_TYPE) TestType<1>, TestType<1>, int>;
+static_assert(__COUNTER__ >= 1000, "");
+
+void fn1(T1 x) { DoNotOptimize(&x); }
+void fn2(typename std::invoke_result_t<T1, int, int>::type x) { DoNotOptimize(&x); }
+
+int main() {
+  DoNotOptimize(&fn1);
+  DoNotOptimize(&fn2);
+}

From 635d103e0be69342b65c251b7aa0e07bed418010 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 15 Jul 2019 02:02:31 +0000
Subject: [PATCH 071/451] [X86] Separate the memory size of
 vzext_load/vextract_store from the element size of the result type. Use them
 improve the codegen of v2f32 loads/stores with sse1 only.

Summary:
SSE1 only supports v4f32. But does have instructions like movlps/movhps that load/store 64-bits of memory.

This patch breaks the connection between the node VT of the vzext_load/vextract_store patterns and the memory VT. Enabling a v4f32 node with a 64-bit memory VT. I've used i64 as the memory VT here. I've written the PatFrag predicate to just check the store size not the specific VT. I think the VT will only matter for CSE purposes. We could use v2f32, but if we want to start using these operations in more places a simple integer type might make the most sense.

I'd like to maybe use this same thing for SSE2 and later as well, but that will need more work to be supported by EltsFromConsecutiveLoads to avoid regressing lit tests. I'd maybe also like to combine bitcasts with these load/stores nodes now that the types are disconnected. And I'd also like to consider canonicalizing (scalar_to_vector + load) to vzext_load.

If you want I can split the mechanical tablegen stuff where I added the 32/64 off from the sse1 change.

Reviewers: spatel, RKSimon

Reviewed By: RKSimon

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64528

llvm-svn: 366034
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 73 +++++++++-----
 llvm/lib/Target/X86/X86InstrAVX512.td         | 96 +++++++++----------
 llvm/lib/Target/X86/X86InstrFragmentsSIMD.td  | 26 +++--
 llvm/lib/Target/X86/X86InstrSSE.td            | 88 ++++++++++-------
 .../CodeGen/X86/sse-intrinsics-fast-isel.ll   | 73 +++-----------
 llvm/test/CodeGen/X86/vector-shuffle-sse1.ll  | 19 +---
 6 files changed, 178 insertions(+), 197 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e0bcf70248948..8be441fe28e08 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -794,6 +794,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
+
+    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
+    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -971,11 +974,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // We want to legalize this to an f64 load rather than an i64 load on
     // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
     // store.
-    setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
     setOperationAction(ISD::LOAD,               MVT::v2i32, Custom);
     setOperationAction(ISD::LOAD,               MVT::v4i16, Custom);
     setOperationAction(ISD::LOAD,               MVT::v8i8,  Custom);
-    setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
     setOperationAction(ISD::STORE,              MVT::v2i32, Custom);
     setOperationAction(ISD::STORE,              MVT::v4i16, Custom);
     setOperationAction(ISD::STORE,              MVT::v8i8,  Custom);
@@ -21267,21 +21268,29 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
         TargetLowering::TypeWidenVector)
     return SDValue();
 
-  // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
-  // and store it.
   MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
                                 StoreVT.getVectorNumElements() * 2);
   StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
                           DAG.getUNDEF(StoreVT));
-  MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
-  MVT CastVT = MVT::getVectorVT(StVT, 2);
-  StoredVal = DAG.getBitcast(CastVT, StoredVal);
-  StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
-                          DAG.getIntPtrConstant(0, dl));
 
-  return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
-                      St->getPointerInfo(), St->getAlignment(),
-                      St->getMemOperand()->getFlags());
+  if (Subtarget.hasSSE2()) {
+    // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+    // and store it.
+    MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+    MVT CastVT = MVT::getVectorVT(StVT, 2);
+    StoredVal = DAG.getBitcast(CastVT, StoredVal);
+    StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
+                            DAG.getIntPtrConstant(0, dl));
+
+    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                        St->getPointerInfo(), St->getAlignment(),
+                        St->getMemOperand()->getFlags());
+  }
+  assert(Subtarget.hasSSE1() && "Expected SSE");
+  SDVTList Tys = DAG.getVTList(MVT::Other);
+  SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};
+  return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64,
+                                 St->getMemOperand());
 }
 
 // Lower vector extended loads using a shuffle. If SSSE3 is not available we
@@ -28155,19 +28164,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     if (!ISD::isNON_EXTLoad(N))
       return;
     auto *Ld = cast<LoadSDNode>(N);
-    MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
-    SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
-                              Ld->getPointerInfo(),
-                              Ld->getAlignment(),
-                              Ld->getMemOperand()->getFlags());
-    SDValue Chain = Res.getValue(1);
-    MVT WideVT = MVT::getVectorVT(LdVT, 2);
-    Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
-    MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
-                                  VT.getVectorNumElements() * 2);
-    Res = DAG.getBitcast(CastVT, Res);
+    if (Subtarget.hasSSE2()) {
+      MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+      SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
+                                Ld->getPointerInfo(), Ld->getAlignment(),
+                                Ld->getMemOperand()->getFlags());
+      SDValue Chain = Res.getValue(1);
+      MVT WideVT = MVT::getVectorVT(LdVT, 2);
+      Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+      MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+                                    VT.getVectorNumElements() * 2);
+      Res = DAG.getBitcast(CastVT, Res);
+      Results.push_back(Res);
+      Results.push_back(Chain);
+      return;
+    }
+    assert(Subtarget.hasSSE1() && "Expected SSE");
+    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
+    SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
+    SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                          MVT::i64, Ld->getMemOperand());
     Results.push_back(Res);
-    Results.push_back(Chain);
+    Results.push_back(Res.getValue(1));
     return;
   }
   }
@@ -32016,8 +32034,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
     // directly if we don't shuffle the lower element and we shuffle the upper
     // (zero) elements within themselves.
     if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
-        (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
-      unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
+        (cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() %
+         MaskEltSizeInBits) == 0) {
+      unsigned Scale =
+          cast<MemIntrinsicSDNode>(V1)->getMemoryVT().getScalarSizeInBits() /
+          MaskEltSizeInBits;
       ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
       if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
           isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 56aa2ecffe071..54eddeacaa173 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1352,15 +1352,15 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
 
 let Predicates = [HasAVX512] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
-  def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQZm addr:$src)>;
 }
 
 let Predicates = [HasVLX] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
-  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQZ128m addr:$src)>;
-  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQZ256m addr:$src)>;
 }
 let Predicates = [HasVLX, HasBWI] in {
@@ -3838,7 +3838,7 @@ def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
                 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
 
 let Predicates = [HasAVX512] in {
-  def : Pat<(X86vextractstore (v2i64 VR128X:$src), addr:$dst),
+  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
             (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
 }
 
@@ -3873,7 +3873,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
 // AVX-512  MOVSS, MOVSD
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_move_scalar<string asm, SDNode OpNode,
+multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
                               X86VectorVTInfo _> {
   let Predicates = [HasAVX512, OptForSize] in
   def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
@@ -3901,7 +3901,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
   let canFoldAsLoad = 1, isReMaterializable = 1 in {
   def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-             [(set _.RC:$dst, (_.VT (X86vzload addr:$src)))],
+             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
              _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
   // _alt version uses FR32/FR64 register class.
   let isCodeGenOnly = 1 in
@@ -3935,10 +3935,10 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
               NotMemoryFoldable;
 }
 
-defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
                                   VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
 
-defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 
@@ -4319,16 +4319,16 @@ let Predicates = [HasAVX512] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8f32 (X86vzload addr:$src)),
+  def : Pat<(v8f32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
-  def : Pat<(v4f64 (X86vzload addr:$src)),
+  def : Pat<(v4f64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
 
   // Represent the same patterns above but in the form they appear for
   // 512-bit types
-  def : Pat<(v16f32 (X86vzload addr:$src)),
+  def : Pat<(v16f32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
-  def : Pat<(v8f64 (X86vzload addr:$src)),
+  def : Pat<(v8f64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
 }
 
@@ -4351,21 +4351,21 @@ let Predicates = [HasAVX512] in {
   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
             (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzload addr:$src)),
+  def : Pat<(v4i32 (X86vzload32 addr:$src)),
             (VMOVDI2PDIZrm addr:$src)>;
-  def : Pat<(v8i32 (X86vzload addr:$src)),
+  def : Pat<(v8i32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
             (VMOVZPQILo2PQIZrr VR128X:$src)>;
-  def : Pat<(v2i64 (X86vzload addr:$src)),
+  def : Pat<(v2i64 (X86vzload64 addr:$src)),
             (VMOVQI2PQIZrm addr:$src)>;
-  def : Pat<(v4i64 (X86vzload addr:$src)),
+  def : Pat<(v4i64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
 
   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
-  def : Pat<(v16i32 (X86vzload addr:$src)),
+  def : Pat<(v16i32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
-  def : Pat<(v8i64 (X86vzload addr:$src)),
+  def : Pat<(v8i64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
 
   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
@@ -6353,11 +6353,11 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
                     (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
             (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
 
   // VMOVLPD patterns
-  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload addr:$src2))),
+  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
             (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
 }
 
@@ -8292,47 +8292,47 @@ let Predicates = [HasVLX] in {
 }
 
 let Predicates = [HasDQI, HasVLX] in {
-  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
             (VCVTPS2QQZ128rm addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             VR128X:$src0)),
             (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             v2i64x_info.ImmAllZerosV)),
             (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
 
-  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
             (VCVTPS2UQQZ128rm addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             VR128X:$src0)),
             (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             v2i64x_info.ImmAllZerosV)),
             (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
 
-  def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+  def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
             (VCVTTPS2QQZ128rm addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             VR128X:$src0)),
             (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             v2i64x_info.ImmAllZerosV)),
             (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
 
-  def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))),
+  def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
             (VCVTTPS2UQQZ128rm addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             VR128X:$src0)),
             (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(v2i64 (vselect VK2WM:$mask,
-                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))),
+                            (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
                             v2i64x_info.ImmAllZerosV)),
             (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
 }
@@ -8375,25 +8375,25 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
 }
 
 let Predicates = [HasVLX] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTDQ2PDZ128rm addr:$src)>;
   def : Pat<(v2f64 (vselect VK2WM:$mask,
-                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
                             VR128X:$src0)),
             (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(v2f64 (vselect VK2WM:$mask,
-                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+                            (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
                             v2f64x_info.ImmAllZerosV)),
             (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
 
-  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTUDQ2PDZ128rm addr:$src)>;
   def : Pat<(v2f64 (vselect VK2WM:$mask,
-                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
                             VR128X:$src0)),
             (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
   def : Pat<(v2f64 (vselect VK2WM:$mask,
-                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))),
+                            (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
                             v2f64x_info.ImmAllZerosV)),
             (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
 }
@@ -8562,7 +8562,7 @@ let Predicates = [HasVLX] in {
                        EVEX_CD8<32, CD8VH>;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTPH2PSZ128rm addr:$src)>;
   def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
               (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
@@ -9626,13 +9626,13 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
-  def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
   }
   let Predicates = [HasVLX] in {
   def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
+  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
@@ -9642,35 +9642,35 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
   def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
-  def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))),
+  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
 
   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
-  def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
   }
   let Predicates = [HasVLX] in {
   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
 
   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
 
   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
   }
   // 512-bit patterns
@@ -10873,7 +10873,7 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)),
           (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
 def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))),
           (VMOVDDUPZ128rm addr:$src)>;
-def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
           (VMOVDDUPZ128rm addr:$src)>;
 
 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4077bdc92f86e..096cc27861caa 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -99,10 +99,10 @@ def X86insertps : SDNode<"X86ISD::INSERTPS",
 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 
-def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
-                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86vextractstore  : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
-                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86vzld  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+                      [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vextractst  : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
+                     [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 
 def SDTVtrunc    : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                         SDTCisInt<0>, SDTCisInt<1>,
@@ -939,10 +939,20 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
 def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
 def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
 
-def vzload_v4i32 : PatFrag<(ops node:$src),
-                           (bitconvert (v4i32 (X86vzload node:$src)))>;
-def vzload_v2i64 : PatFrag<(ops node:$src),
-                           (bitconvert (v2i64 (X86vzload node:$src)))>;
+def X86vzload32 : PatFrag<(ops node:$src),
+                          (X86vzld node:$src), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
+}]>;
+
+def X86vzload64 : PatFrag<(ops node:$src),
+                          (X86vzld node:$src), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
+
+def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr),
+                                 (X86vextractst node:$val, node:$ptr), [{
+  return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
 
 
 def fp32imm0 : PatLeaf<(f32 fpimm), [{
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index d25d216db1942..7d0a5b87baf49 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -226,14 +226,15 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
 
 // Loading from memory automatically zeroing upper bits.
 multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
-                         PatFrag mem_pat, string OpcodeStr, Domain d> {
+                         PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr,
+                         Domain d> {
   def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(set VR128:$dst, (vt (X86vzload addr:$src)))], d>,
+                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
                      VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
   def NAME#rm   : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                     [(set VR128:$dst, (vt (X86vzload addr:$src)))], d>,
+                     [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
                      Sched<[WriteFLoad]>;
 
   // _alt version uses FR32/FR64 register class.
@@ -255,9 +256,9 @@ defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
                         SSEPackedDouble, "MOVSD", UseSSE2>, XD;
 
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-  defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, "movss",
+  defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
                              SSEPackedSingle>, XS;
-  defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, "movsd",
+  defm MOVSD : sse12_move_rm<FR64, v2f64, f64mem, loadf64, X86vzload64, "movsd",
                              SSEPackedDouble>, XD;
 }
 
@@ -270,9 +271,9 @@ let Predicates = [UseAVX] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8f32 (X86vzload addr:$src)),
+  def : Pat<(v8f32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
-  def : Pat<(v4f64 (X86vzload addr:$src)),
+  def : Pat<(v4f64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
 }
 
@@ -663,6 +664,13 @@ let Predicates = [UseSSE1] in {
   def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1,
                       (i8 -28)),
             (MOVLPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)),
+            (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(v4f32 (X86vzload64 addr:$src)),
+            (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>;
+  def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst),
+            (MOVLPSmr addr:$dst, VR128:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -702,7 +710,7 @@ let Predicates = [UseAVX] in {
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
 
   def : Pat<(store (f64 (extractelt
@@ -711,7 +719,7 @@ let Predicates = [UseAVX] in {
             (VMOVHPDmr addr:$dst, VR128:$src)>;
 
   // MOVLPD patterns
-  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))),
+  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
             (VMOVLPDrm VR128:$src1, addr:$src2)>;
 }
 
@@ -721,6 +729,12 @@ let Predicates = [UseSSE1] in {
   // No need for aligned load, we're only loading 64-bits.
   def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))),
             (MOVHPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))),
+            (MOVHPSrm VR128:$src1, addr:$src2)>;
+
+  def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)),
+                                addr:$dst),
+            (MOVHPSmr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [UseSSE2] in {
@@ -731,7 +745,7 @@ let Predicates = [UseSSE2] in {
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
 
   def : Pat<(store (f64 (extractelt
@@ -740,7 +754,7 @@ let Predicates = [UseSSE2] in {
             (MOVHPDmr addr:$dst, VR128:$src)>;
 
   // MOVLPD patterns
-  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))),
+  def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))),
             (MOVLPDrm VR128:$src1, addr:$src2)>;
 }
 
@@ -1631,13 +1645,13 @@ def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 
 // AVX register conversion intrinsics
 let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTDQ2PDrm addr:$src)>;
 } // Predicates = [HasAVX, NoVLX]
 
 // SSE2 register conversion intrinsics
 let Predicates = [UseSSE2] in {
-  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (CVTDQ2PDrm addr:$src)>;
 } // Predicates = [UseSSE2]
 
@@ -4124,9 +4138,9 @@ let Predicates = [UseAVX] in {
   // These instructions also write zeros in the high part of a 256-bit register.
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
             (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzload addr:$src)),
+  def : Pat<(v4i32 (X86vzload32 addr:$src)),
             (VMOVDI2PDIrm addr:$src)>;
-  def : Pat<(v8i32 (X86vzload addr:$src)),
+  def : Pat<(v8i32 (X86vzload32 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>;
 }
 
@@ -4138,7 +4152,7 @@ let Predicates = [UseSSE2] in {
             (MOV64toPQIrr GR64:$src)>;
   def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
             (MOVDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzload addr:$src)),
+  def : Pat<(v4i32 (X86vzload32 addr:$src)),
             (MOVDI2PDIrm addr:$src)>;
 }
 
@@ -4206,19 +4220,19 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
                 (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
 
 let Predicates = [UseAVX] in {
-  def : Pat<(v2i64 (X86vzload addr:$src)),
+  def : Pat<(v2i64 (X86vzload64 addr:$src)),
             (VMOVQI2PQIrm addr:$src)>;
-  def : Pat<(v4i64 (X86vzload addr:$src)),
+  def : Pat<(v4i64 (X86vzload64 addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
 
-  def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
+  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
             (VMOVPQI2QImr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [UseSSE2] in {
-  def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>;
 
-  def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
+  def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst),
             (MOVPQI2QImr addr:$dst, VR128:$src)>;
 }
 
@@ -4368,7 +4382,7 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
 }
 
@@ -4376,7 +4390,7 @@ let Predicates = [UseSSE3] in {
   // No need for aligned memory as this only loads 64-bits.
   def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))),
             (MOVDDUPrm addr:$src)>;
-  def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))),
+  def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))),
             (MOVDDUPrm addr:$src)>;
 }
 
@@ -4953,7 +4967,7 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
 
   def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
-  def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
             (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
 
   def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
@@ -4961,12 +4975,12 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy,
 
   def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))),
             (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
 
   def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
-  def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
   }
 }
@@ -5018,7 +5032,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
-  def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
   def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
@@ -5026,7 +5040,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
   let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v4i32 addr:$src)))),
+  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))),
             (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
@@ -5040,14 +5054,14 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
-  def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
   def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
 
   def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v4i32 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))),
             (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
@@ -5056,7 +5070,7 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
-  def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
   def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))),
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
@@ -7261,10 +7275,10 @@ let Predicates = [HasF16C, NoVLX] in {
                                WriteCvtPS2PHYSt>, VEX_L;
 
   // Pattern match vcvtph2ps of a scalar i64 load.
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
+  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
             (VCVTPH2PSrm addr:$src)>;
-  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
-              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
+  def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16
+              (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (VCVTPH2PSrm addr:$src)>;
 
   def : Pat<(store (f64 (extractelt
@@ -7436,9 +7450,9 @@ defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
 
 let Predicates = [HasAVX2, NoVLX] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
-  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQrm addr:$src)>;
-  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+  def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
             (VPBROADCASTQYrm addr:$src)>;
 
   def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
@@ -7550,7 +7564,7 @@ let Predicates = [HasAVX, NoVLX] in {
             (VMOVDDUPrr VR128:$src)>;
   def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
             (VMOVDDUPrm addr:$src)>;
-  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
+  def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
             (VMOVDDUPrm addr:$src)>;
 }
 
diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index cc61875b916c7..9f51a94051330 100644
--- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -1319,14 +1319,8 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) {
 ; X86-SSE-LABEL: test_mm_loadh_pi:
 ; X86-SSE:       # %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08]
-; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04]
-; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    shufps $0, %xmm1, %xmm2 # encoding: [0x0f,0xc6,0xd1,0x00]
-; X86-SSE-NEXT:    # xmm2 = xmm2[0,0],xmm1[0,0]
-; X86-SSE-NEXT:    shufps $36, %xmm2, %xmm0 # encoding: [0x0f,0xc6,0xc2,0x24]
-; X86-SSE-NEXT:    # xmm0 = xmm0[0,1],xmm2[2,0]
+; X86-SSE-NEXT:    movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00]
+; X86-SSE-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_mm_loadh_pi:
@@ -1345,18 +1339,8 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) {
 ;
 ; X64-SSE-LABEL: test_mm_loadh_pi:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
-; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8]
-; X64-SSE-NEXT:    shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20]
-; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
-; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8]
-; X64-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc]
-; X64-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
-; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-SSE-NEXT:    movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1]
-; X64-SSE-NEXT:    # xmm0 = xmm0[0],xmm1[0]
+; X64-SSE-NEXT:    movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07]
+; X64-SSE-NEXT:    # xmm0 = xmm0[0,1],mem[0,1]
 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_mm_loadh_pi:
@@ -1381,15 +1365,8 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) {
 ; X86-SSE-LABEL: test_mm_loadl_pi:
 ; X86-SSE:       # %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    movss (%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x10]
-; X86-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    movss 4(%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x48,0x04]
-; X86-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
-; X86-SSE-NEXT:    shufps $0, %xmm2, %xmm1 # encoding: [0x0f,0xc6,0xca,0x00]
-; X86-SSE-NEXT:    # xmm1 = xmm1[0,0],xmm2[0,0]
-; X86-SSE-NEXT:    shufps $226, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe2]
-; X86-SSE-NEXT:    # xmm1 = xmm1[2,0],xmm0[2,3]
-; X86-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; X86-SSE-NEXT:    movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00]
+; X86-SSE-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_mm_loadl_pi:
@@ -1408,19 +1385,8 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) {
 ;
 ; X64-SSE-LABEL: test_mm_loadl_pi:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movq (%rdi), %rax # encoding: [0x48,0x8b,0x07]
-; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8]
-; X64-SSE-NEXT:    shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20]
-; X64-SSE-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc]
-; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8]
-; X64-SSE-NEXT:    # xmm1 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc]
-; X64-SSE-NEXT:    # xmm2 = mem[0],zero,zero,zero
-; X64-SSE-NEXT:    unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca]
-; X64-SSE-NEXT:    # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X64-SSE-NEXT:    shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4]
-; X64-SSE-NEXT:    # xmm1 = xmm1[0,1],xmm0[2,3]
-; X64-SSE-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; X64-SSE-NEXT:    movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07]
+; X64-SSE-NEXT:    # xmm0 = mem[0,1],xmm0[2,3]
 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_mm_loadl_pi:
@@ -2818,13 +2784,7 @@ define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind {
 ; X86-SSE-LABEL: test_mm_storeh_pi2:
 ; X86-SSE:       # %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    movaps %xmm0, %xmm1 # encoding: [0x0f,0x28,0xc8]
-; X86-SSE-NEXT:    movhlps %xmm0, %xmm1 # encoding: [0x0f,0x12,0xc8]
-; X86-SSE-NEXT:    # xmm1 = xmm0[1],xmm1[1]
-; X86-SSE-NEXT:    shufps $231, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0xe7]
-; X86-SSE-NEXT:    # xmm0 = xmm0[3,1,2,3]
-; X86-SSE-NEXT:    movss %xmm0, 4(%eax) # encoding: [0xf3,0x0f,0x11,0x40,0x04]
-; X86-SSE-NEXT:    movss %xmm1, (%eax) # encoding: [0xf3,0x0f,0x11,0x08]
+; X86-SSE-NEXT:    movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00]
 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_mm_storeh_pi2:
@@ -2841,11 +2801,7 @@ define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind {
 ;
 ; X64-SSE-LABEL: test_mm_storeh_pi2:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0]
-; X64-SSE-NEXT:    # xmm0 = xmm0[1,1]
-; X64-SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8]
-; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8]
-; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
+; X64-SSE-NEXT:    movhps %xmm0, (%rdi) # encoding: [0x0f,0x17,0x07]
 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_mm_storeh_pi2:
@@ -2922,10 +2878,7 @@ define void @test_mm_storel_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind {
 ; X86-SSE-LABEL: test_mm_storel_pi2:
 ; X86-SSE:       # %bb.0:
 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-SSE-NEXT:    movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00]
-; X86-SSE-NEXT:    shufps $229, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0xe5]
-; X86-SSE-NEXT:    # xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT:    movss %xmm0, 4(%eax) # encoding: [0xf3,0x0f,0x11,0x40,0x04]
+; X86-SSE-NEXT:    movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00]
 ; X86-SSE-NEXT:    retl # encoding: [0xc3]
 ;
 ; X86-AVX1-LABEL: test_mm_storel_pi2:
@@ -2942,9 +2895,7 @@ define void @test_mm_storel_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind {
 ;
 ; X64-SSE-LABEL: test_mm_storel_pi2:
 ; X64-SSE:       # %bb.0:
-; X64-SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8]
-; X64-SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8]
-; X64-SSE-NEXT:    movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
+; X64-SSE-NEXT:    movlps %xmm0, (%rdi) # encoding: [0x0f,0x13,0x07]
 ; X64-SSE-NEXT:    retq # encoding: [0xc3]
 ;
 ; X64-AVX1-LABEL: test_mm_storel_pi2:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll
index eb0f0b043e24e..7a8c41519d654 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -230,15 +230,7 @@ define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
 ; SSE1-LABEL: insert_mem_lo_v4f32:
 ; SSE1:       # %bb.0:
-; SSE1-NEXT:    movq (%rdi), %rax
-; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; SSE1-NEXT:    shrq $32, %rax
-; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
-; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
 ; SSE1-NEXT:    retq
   %a = load <2 x float>, <2 x float>* %ptr
   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -249,14 +241,7 @@ define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
 ; SSE1-LABEL: insert_mem_hi_v4f32:
 ; SSE1:       # %bb.0:
-; SSE1-NEXT:    movq (%rdi), %rax
-; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; SSE1-NEXT:    shrq $32, %rax
-; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
-; SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE1-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE1-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
 ; SSE1-NEXT:    retq
   %a = load <2 x float>, <2 x float>* %ptr
   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>

From 2d63fbb7b1f7bbbd91f9f862946663a813e1780c Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Mon, 15 Jul 2019 03:24:35 +0000
Subject: [PATCH 072/451] [ValueTracking] Look through constant Int2Ptr/Ptr2Int
 expressions

Summary:
This is analogous to the int2ptr/ptr2int instruction handling introduced
in D54956.

Reviewers: fhahn, efriedma, spatel, nlopes, sanjoy, lebedev.ri

Subscribers: hiraditya, bollu, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64708

llvm-svn: 366036
---
 llvm/lib/Analysis/ValueTracking.cpp           |  9 +++++++++
 llvm/test/Transforms/FunctionAttrs/nonnull.ll | 10 ++++++++++
 2 files changed, 19 insertions(+)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ad8034b2d7bc5..49a328bbc9ba3 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1998,6 +1998,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
       // Must be non-zero due to null test above.
       return true;
 
+    if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+      // See the comment for IntToPtr/PtrToInt instructions below.
+      if (CE->getOpcode() == Instruction::IntToPtr ||
+          CE->getOpcode() == Instruction::PtrToInt)
+        if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <=
+            Q.DL.getTypeSizeInBits(CE->getType()))
+          return isKnownNonZero(CE->getOperand(0), Depth, Q);
+    }
+
     // For constant vectors, check that all elements are undefined or known
     // non-zero to determine that the whole vector is known non-zero.
     if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll
index 7029be9691d78..6c04e4907d8e8 100644
--- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll
@@ -237,4 +237,14 @@ define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) {
   ret i32 addrspace(3)* %q
 }
 
+; CHECK: define internal nonnull i32* @f2()
+define internal i32* @f2() {
+  ret i32* inttoptr (i64 4 to i32*)
+}
+
+define  i32* @f1() {
+ %c = call i32* @f2()
+  ret i32* %c
+}
+
 attributes #0 = { "null-pointer-is-valid"="true" }

From 776ac79e88ddd630f2abf346f685a7ef53529502 Mon Sep 17 00:00:00 2001
From: Kang Zhang <shkzhang@cn.ibm.com>
Date: Mon, 15 Jul 2019 03:55:10 +0000
Subject: [PATCH 073/451] [NFC][PowerPC] Add the test block-placement.mir

llvm-svn: 366037
---
 llvm/test/CodeGen/PowerPC/block-placement.mir | 217 ++++++++++++++++++
 1 file changed, 217 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/block-placement.mir

diff --git a/llvm/test/CodeGen/PowerPC/block-placement.mir b/llvm/test/CodeGen/PowerPC/block-placement.mir
new file mode 100644
index 0000000000000..3c6937cdc4a6d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/block-placement.mir
@@ -0,0 +1,217 @@
+# RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \
+# RUN:   -run-pass=block-placement -o - %s | FileCheck %s
+--- |
+  ; ModuleID = 'block-placement.ll'
+  source_filename = "block-placement.ll"
+  target datalayout = "e-m:e-i64:64-n32:64"
+  target triple = "powerpc64le-unknown-linux-gnu"
+  
+  %"class.xercesc_2_7::HashXMLCh" = type { %"class.xercesc_2_7::HashBase" }
+  %"class.xercesc_2_7::HashBase" = type { i32 (...)** }
+  
+  define dso_local zeroext i1 @_ZN11xercesc_2_79HashXMLCh6equalsEPKvS2_(%"class.xercesc_2_7::HashXMLCh"* nocapture readnone %this, i8* readonly %key1, i8* readonly %key2) unnamed_addr #0 {
+  entry:
+    %cmp.i = icmp eq i8* %key1, null
+    %cmp1.i = icmp eq i8* %key2, null
+    %or.cond.i = or i1 %cmp.i, %cmp1.i
+    br i1 %or.cond.i, label %if.then.i, label %while.cond.preheader.i
+  
+  while.cond.preheader.i:                           ; preds = %entry
+    %0 = bitcast i8* %key2 to i16*
+    %1 = bitcast i8* %key1 to i16*
+    %2 = load i16, i16* %1, align 2
+    %3 = load i16, i16* %0, align 2
+    %cmp926.i = icmp eq i16 %2, %3
+    br i1 %cmp926.i, label %while.body.i.preheader, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit
+  
+  while.body.i.preheader:                           ; preds = %while.cond.preheader.i
+    %scevgep = getelementptr i8, i8* %key2, i64 2
+    %scevgep4 = getelementptr i8, i8* %key1, i64 2
+    br label %while.body.i
+  
+  if.then.i:                                        ; preds = %entry
+    br i1 %cmp.i, label %lor.lhs.false3.i, label %land.lhs.true.i
+  
+  land.lhs.true.i:                                  ; preds = %if.then.i
+    %4 = bitcast i8* %key1 to i16*
+    %5 = load i16, i16* %4, align 2
+    %tobool.i = icmp eq i16 %5, 0
+    br i1 %tobool.i, label %lor.lhs.false3.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit
+  
+  lor.lhs.false3.i:                                 ; preds = %land.lhs.true.i, %if.then.i
+    br i1 %cmp1.i, label %if.else.i, label %land.lhs.true5.i
+  
+  land.lhs.true5.i:                                 ; preds = %lor.lhs.false3.i
+    %6 = bitcast i8* %key2 to i16*
+    %7 = load i16, i16* %6, align 2
+    %tobool6.i = icmp eq i16 %7, 0
+    br i1 %tobool6.i, label %if.else.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit
+  
+  if.else.i:                                        ; preds = %land.lhs.true5.i, %lor.lhs.false3.i
+    br label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit
+  
+  while.body.i:                                     ; preds = %while.body.i.preheader, %if.end12.i
+    %lsr.iv5 = phi i8* [ %scevgep4, %while.body.i.preheader ], [ %scevgep6, %if.end12.i ]
+    %lsr.iv = phi i8* [ %scevgep, %while.body.i.preheader ], [ %scevgep2, %if.end12.i ]
+    %8 = phi i16 [ %15, %if.end12.i ], [ %2, %while.body.i.preheader ]
+    %9 = phi i8* [ %key1, %while.body.i.preheader ], [ %13, %if.end12.i ]
+    %10 = phi i8* [ %key2, %while.body.i.preheader ], [ %11, %if.end12.i ]
+    %11 = getelementptr i8, i8* %10, i64 2
+    %12 = bitcast i8* %11 to i16*
+    %13 = getelementptr i8, i8* %9, i64 2
+    %14 = bitcast i8* %13 to i16*
+    %tobool10.i = icmp eq i16 %8, 0
+    br i1 %tobool10.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit, label %if.end12.i
+  
+  if.end12.i:                                       ; preds = %while.body.i
+    %15 = load i16, i16* %14, align 2
+    %16 = load i16, i16* %12, align 2
+    %cmp9.i = icmp eq i16 %15, %16
+    %scevgep2 = getelementptr i8, i8* %lsr.iv, i64 2
+    %scevgep6 = getelementptr i8, i8* %lsr.iv5, i64 2
+    br i1 %cmp9.i, label %while.body.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit
+  
+  _ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit:    ; preds = %if.end12.i, %while.body.i, %if.else.i, %land.lhs.true5.i, %land.lhs.true.i, %while.cond.preheader.i
+    %retval.0.i1 = phi i64 [ 1, %if.else.i ], [ 0, %land.lhs.true.i ], [ 0, %land.lhs.true5.i ], [ 0, %while.cond.preheader.i ], [ 0, %if.end12.i ], [ 1, %while.body.i ]
+    %backToBool = trunc i64 %retval.0.i1 to i1
+    ret i1 %backToBool
+  }
+  
+  attributes #0 = { "target-cpu"="pwr9" }
+
+...
+---
+name:            _ZN11xercesc_2_79HashXMLCh6equalsEPKvS2_
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:
+  - { reg: '$x4', virtual-reg: '' }
+  - { reg: '$x5', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.5(0x40000000), %bb.1(0x40000000)
+    liveins: $x4, $x5
+  
+    renamable $cr1 = CMPDI renamable $x4, 0
+    renamable $cr0 = CMPDI renamable $x5, 0
+    renamable $cr5lt = CROR renamable $cr1eq, renamable $cr0eq
+    BC killed renamable $cr5lt, %bb.5
+  
+  bb.1.while.cond.preheader.i:
+    successors: %bb.2(0x40000000), %bb.11(0x40000000)
+    liveins: $x4, $x5
+  
+    renamable $r8 = LHZ 0, renamable $x4 :: (load 2 from %ir.1)
+    renamable $r6 = LHZ 0, renamable $x5 :: (load 2 from %ir.0)
+    renamable $x3 = LI8 0
+    renamable $cr0 = CMPLW renamable $r8, killed renamable $r6
+    BCC 68, killed renamable $cr0, %bb.11
+  
+  bb.2.while.body.i.preheader:
+    successors: %bb.3(0x80000000)
+    liveins: $r8, $x3, $x4, $x5
+  
+    renamable $x6 = ADDI8 renamable $x5, 2
+    renamable $x7 = ADDI8 renamable $x4, 2
+  
+  bb.3.while.body.i:
+    successors: %bb.4(0x04000000), %bb.10(0x7c000000)
+    liveins: $r8, $x3, $x4, $x5, $x6, $x7
+  
+    dead renamable $r8 = ANDIo killed renamable $r8, 65535, implicit-def $cr0
+    BCC 68, killed renamable $cr0, %bb.10
+  
+  bb.4:
+    renamable $x3 = LI8 1
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+  
+  bb.5.if.then.i:
+    successors: %bb.7(0x30000000), %bb.6(0x50000000)
+    liveins: $cr0, $cr1, $x4, $x5
+  
+    BC killed renamable $cr1eq, %bb.7
+  
+  bb.6.land.lhs.true.i:
+    successors: %bb.7(0x30000000), %bb.11(0x50000000)
+    liveins: $cr0, $x4, $x5
+  
+    renamable $r4 = LHZ 0, killed renamable $x4 :: (load 2 from %ir.4)
+    renamable $x3 = LI8 0
+    renamable $cr1 = CMPLWI killed renamable $r4, 0
+    BCC 68, killed renamable $cr1, %bb.11
+  
+  bb.7.lor.lhs.false3.i:
+    successors: %bb.9(0x30000000), %bb.8(0x50000000)
+    liveins: $cr0, $x5
+  
+    BC killed renamable $cr0eq, %bb.9
+  
+  bb.8.land.lhs.true5.i:
+    successors: %bb.9(0x80000000)
+    liveins: $x5
+  
+    renamable $r4 = LHZ 0, killed renamable $x5 :: (load 2 from %ir.6)
+    renamable $x3 = LI8 0
+    renamable $cr0 = CMPLWI killed renamable $r4, 0
+    BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm, implicit killed $x3
+  
+  bb.9.if.else.i:
+    renamable $x3 = LI8 1
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+  
+  bb.10.if.end12.i:
+    successors: %bb.3(0x7c000000), %bb.11(0x04000000)
+    liveins: $x3, $x4, $x5, $x6, $x7
+  
+    renamable $x5 = ADDI8 killed renamable $x5, 2
+    renamable $x4 = ADDI8 killed renamable $x4, 2
+    renamable $r8 = LHZ 0, renamable $x4 :: (load 2 from %ir.14)
+    renamable $r9 = LHZ 0, renamable $x5 :: (load 2 from %ir.12)
+    renamable $x6 = ADDI8 killed renamable $x6, 2
+    renamable $x7 = ADDI8 killed renamable $x7, 2
+    renamable $cr0 = CMPLW renamable $r8, killed renamable $r9
+    BCC 76, killed renamable $cr0, %bb.3
+  
+  bb.11._ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit:
+    liveins: $x3
+  
+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+
+  ; CHECK:      bb.5.if.else.i:
+  ; CHECK:      B %bb.11
+
+  ; CHECK:      bb.11:
+  ; CHECK:        renamable $x3 = LI8 1
+  ; CHECK-NEXT:   BLR8 implicit $lr8, implicit $rm, implicit killed $x3
+...

From c3805d761ea8be6f35212a29aec09f03f1be5612 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 15 Jul 2019 04:51:34 +0000
Subject: [PATCH 074/451] [BPF] add unit tests for
 preserve_{array,union,struct}_access_index intrinsics

This is a followup patch for https://reviews.llvm.org/D61810/new/,
which adds new intrinsics preserve_{array,union,struct}_access_index.

Currently, only BPF backend utilizes preserve_{array,union,struct}_access_index
intrinsics, so all tests are compiled with BPF target.

https://reviews.llvm.org/D61524 already added some tests for these
intrinsics, but some of them pretty complex.
This patch added a few unit test cases focusing on individual intrinsic
functions.

Also made a few clarification on language reference for these intrinsics.

Differential Revision: https://reviews.llvm.org/D64606

llvm-svn: 366038
---
 llvm/docs/LangRef.rst                         | 12 ++-
 llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll | 80 +++++++++++++++++++
 .../test/CodeGen/BPF/CORE/intrinsic-struct.ll | 77 ++++++++++++++++++
 llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll | 76 ++++++++++++++++++
 4 files changed, 242 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll
 create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll
 create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2ccc8bd591614..18f760d9b0500 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -17332,7 +17332,7 @@ Syntax:
 """""""
 ::
 
-      declare <type2>
+      declare <ret_type>
       @llvm.preserve.array.access.index.p0s_union.anons.p0a10s_union.anons(<type> base,
                                                                            i32 dim,
                                                                            i32 index)
@@ -17342,7 +17342,9 @@ Overview:
 
 The '``llvm.preserve.array.access.index``' intrinsic returns the getelementptr address
 based on array base ``base``, array dimension ``dim`` and the last access index ``index``
-into the array.
+into the array. The return type ``ret_type`` is a pointer type to the array element.
+The array ``dim`` and ``index`` are preserved which is more robust than
+getelementptr instruction which may be subject to compiler transformation.
 
 Arguments:
 """"""""""
@@ -17375,6 +17377,8 @@ The '``llvm.preserve.union.access.index``' intrinsic carries the debuginfo field
 ``di_index`` and returns the ``base`` address.
 The ``llvm.preserve.access.index`` type of metadata is attached to this call instruction
 to provide union debuginfo type.
+The metadata is a ``DICompositeType`` representing the debuginfo version of ``type``.
+The return type ``type`` is the same as the ``base`` type.
 
 Arguments:
 """"""""""
@@ -17393,7 +17397,7 @@ Syntax:
 """""""
 ::
 
-      declare <type2>
+      declare <ret_type>
       @llvm.preserve.struct.access.index.p0i8.p0s_struct.anon.0s(<type> base,
                                                                  i32 gep_index,
                                                                  i32 di_index)
@@ -17405,6 +17409,8 @@ The '``llvm.preserve.struct.access.index``' intrinsic returns the getelementptr
 based on struct base ``base`` and IR struct member index ``gep_index``.
 The ``llvm.preserve.access.index`` type of metadata is attached to this call instruction
 to provide struct debuginfo type.
+The metadata is a ``DICompositeType`` representing the debuginfo version of ``type``.
+The return type ``ret_type`` is a pointer type to the structure member.
 
 Arguments:
 """"""""""
diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll
new file mode 100644
index 0000000000000..adbcb9fd75ad2
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s
+;
+; Source code:
+;   #define _(x) (__builtin_preserve_access_index(x))
+;   struct s { int a; int b; };
+;   int get_value(const void *addr);
+;   int test(struct s *arg) { return get_value(_(&arg[2].b)); }
+; Compiler flag to generate IR:
+;   clang -target bpf -S -O2 -g -emit-llvm test.c
+
+%struct.s = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local i32 @test(%struct.s* %arg) local_unnamed_addr #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata %struct.s* %arg, metadata !17, metadata !DIExpression()), !dbg !18
+  %0 = tail call %struct.s* @llvm.preserve.array.access.index.p0s_struct.ss.p0s_struct.ss(%struct.s* %arg, i32 0, i32 2), !dbg !19
+  %1 = tail call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s* %0, i32 1, i32 1), !dbg !19, !llvm.preserve.access.index !12
+  %2 = bitcast i32* %1 to i8*, !dbg !19
+  %call = tail call i32 @get_value(i8* %2) #4, !dbg !20
+  ret i32 %call, !dbg !21
+}
+; CHECK-LABEL: test
+; CHECK:       [[RELOC:.Ltmp[0-9]+]]
+; CHECK:       r2 = 20
+; CHECK:       r1 += r2
+; CHECK:       call get_value
+; CHECK:       exit
+;
+; CHECK:      .section        .BTF.ext,"",@progbits
+; CHECK:      .long   12                      # OffsetReloc
+; CHECK-NEXT: .long   20                      # Offset reloc section string offset=20
+; CHECK-NEXT: .long   1
+; CHECK-NEXT: .long   [[RELOC]]
+; CHECK-NEXT: .long   2
+; CHECK-NEXT: .long   26
+
+declare dso_local i32 @get_value(i8*) local_unnamed_addr #1
+
+; Function Attrs: nounwind readnone
+declare %struct.s* @llvm.preserve.array.access.index.p0s_struct.ss.p0s_struct.ss(%struct.s*, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nounwind readnone
+declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s*, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #3
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind readnone speculatable }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 365789)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 9.0.0 (trunk 365789)"}
+!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
+!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 2, size: 64, elements: !13)
+!13 = !{!14, !15}
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 2, baseType: !10, size: 32)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 2, baseType: !10, size: 32, offset: 32)
+!16 = !{!17}
+!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 4, type: !11)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 4, column: 44, scope: !7)
+!20 = !DILocation(line: 4, column: 34, scope: !7)
+!21 = !DILocation(line: 4, column: 27, scope: !7)
diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll
new file mode 100644
index 0000000000000..c07c16f522847
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll
@@ -0,0 +1,77 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s
+;
+; Source code:
+;   struct s { int a; int b; };
+;   #define _(x) (__builtin_preserve_access_index(x))
+;   int get_value(const void *addr);
+;   int test(struct s *arg) { return get_value(_(&arg->b)); }
+; Compiler flag to generate IR:
+;   clang -target bpf -S -O2 -g -emit-llvm test.c
+
+%struct.s = type { i32, i32 }
+
+; Function Attrs: nounwind
+define dso_local i32 @test(%struct.s* %arg) local_unnamed_addr #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata %struct.s* %arg, metadata !17, metadata !DIExpression()), !dbg !18
+  %0 = tail call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s* %arg, i32 1, i32 1), !dbg !19, !llvm.preserve.access.index !12
+  %1 = bitcast i32* %0 to i8*, !dbg !19
+  %call = tail call i32 @get_value(i8* %1) #4, !dbg !20
+  ret i32 %call, !dbg !21
+}
+
+; CHECK-LABEL: test
+; CHECK:       [[RELOC:.Ltmp[0-9]+]]
+; CHECK:       r2 = 4
+; CHECK:       r1 += r2
+; CHECK:       call get_value
+; CHECK:       exit
+;
+; CHECK:      .section        .BTF.ext,"",@progbits
+; CHECK:      .long   12                      # OffsetReloc
+; CHECK-NEXT: .long   20                      # Offset reloc section string offset=20
+; CHECK-NEXT: .long   1
+; CHECK-NEXT: .long   [[RELOC]]
+; CHECK-NEXT: .long   2
+; CHECK-NEXT: .long   26
+
+declare dso_local i32 @get_value(i8*) local_unnamed_addr #1
+
+; Function Attrs: nounwind readnone
+declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s*, i32 immarg, i32 immarg) #2
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #3
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind readnone speculatable }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 365789)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 9.0.0 (trunk 365789)"}
+!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
+!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 1, size: 64, elements: !13)
+!13 = !{!14, !15}
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 1, baseType: !10, size: 32, offset: 32)
+!16 = !{!17}
+!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 4, type: !11)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 4, column: 44, scope: !7)
+!20 = !DILocation(line: 4, column: 34, scope: !7)
+!21 = !DILocation(line: 4, column: 27, scope: !7)
diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll
new file mode 100644
index 0000000000000..19b459533be6d
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll
@@ -0,0 +1,76 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s
+;
+; Source code:
+;   union u { int a; int b; };
+;   #define _(x) (__builtin_preserve_access_index(x))
+;   int get_value(const void *addr);
+;   int test(union u *arg) { return get_value(_(&arg->b)); }
+; Compiler flag to generate IR:
+;   clang -target bpf -S -O2 -g -emit-llvm test.c
+
+%union.u = type { i32 }
+
+; Function Attrs: nounwind
+define dso_local i32 @test(%union.u* %arg) local_unnamed_addr #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata %union.u* %arg, metadata !17, metadata !DIExpression()), !dbg !18
+  %0 = tail call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %arg, i32 1), !dbg !19, !llvm.preserve.access.index !12
+  %1 = bitcast %union.u* %0 to i8*, !dbg !19
+  %call = tail call i32 @get_value(i8* %1) #4, !dbg !20
+  ret i32 %call, !dbg !21
+}
+; CHECK-LABEL: test
+; CHECK:       [[RELOC:.Ltmp[0-9]+]]
+; CHECK:       r2 = 0
+; CHECK:       r1 += r2
+; CHECK:       call get_value
+; CHECK:       exit
+
+; CHECK:      .section        .BTF.ext,"",@progbits
+; CHECK:      .long   12                      # OffsetReloc
+; CHECK-NEXT: .long   20                      # Offset reloc section string offset=20
+; CHECK-NEXT: .long   1
+; CHECK-NEXT: .long   [[RELOC]]
+; CHECK-NEXT: .long   2
+; CHECK-NEXT: .long   26
+
+declare dso_local i32 @get_value(i8*) local_unnamed_addr #1
+
+; Function Attrs: nounwind readnone
+declare %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u*, i32 immarg) #2
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.value(metadata, metadata, metadata) #3
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind readnone speculatable }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 365789)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 9.0.0 (trunk 365789)"}
+!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !11}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
+!12 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u", file: !1, line: 1, size: 32, elements: !13)
+!13 = !{!14, !15}
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 1, baseType: !10, size: 32)
+!16 = !{!17}
+!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 4, type: !11)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 4, column: 43, scope: !7)
+!20 = !DILocation(line: 4, column: 33, scope: !7)
+!21 = !DILocation(line: 4, column: 26, scope: !7)

From 335f955dc4942d6956e759b8c2686c52914c36b6 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 15 Jul 2019 05:02:32 +0000
Subject: [PATCH 075/451] [PowerPC] Support fp128 libcalls

On PowerPC, IEEE 754 quadruple-precision libcall names use "kf" instead of "tf".

In libgcc, libgcc/config/rs6000/float128-sed converts TF names to KF
names. This patch implements its 24 substitution rules.

Reviewed By: hfinkel

Differential Revision: https://reviews.llvm.org/D64282

llvm-svn: 366039
---
 llvm/lib/CodeGen/TargetLoweringBase.cpp     |  28 ++++
 llvm/test/CodeGen/PowerPC/fp128-libcalls.ll | 164 ++++++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/fp128-libcalls.ll

diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1fd3d38255e51..7c135864766fe 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -123,6 +123,34 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
   for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
     setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
 
+  // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
+  if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
+    setLibcallName(RTLIB::ADD_F128, "__addkf3");
+    setLibcallName(RTLIB::SUB_F128, "__subkf3");
+    setLibcallName(RTLIB::MUL_F128, "__mulkf3");
+    setLibcallName(RTLIB::DIV_F128, "__divkf3");
+    setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
+    setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+    setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
+    setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
+    setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
+    setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+    setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
+    setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+    setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+    setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
+    setLibcallName(RTLIB::UNE_F128, "__nekf2");
+    setLibcallName(RTLIB::OGE_F128, "__gekf2");
+    setLibcallName(RTLIB::OLT_F128, "__ltkf2");
+    setLibcallName(RTLIB::OLE_F128, "__lekf2");
+    setLibcallName(RTLIB::OGT_F128, "__gtkf2");
+    setLibcallName(RTLIB::UO_F128, "__unordkf2");
+    setLibcallName(RTLIB::O_F128, "__unordkf2");
+  }
+
   // A few names are different on particular architectures or environments.
   if (TT.isOSDarwin()) {
     // For f16/f32 conversions, Darwin uses the standard naming scheme, instead
diff --git a/llvm/test/CodeGen/PowerPC/fp128-libcalls.ll b/llvm/test/CodeGen/PowerPC/fp128-libcalls.ll
new file mode 100644
index 0000000000000..9d875c854e320
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fp128-libcalls.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s -O2 -mtriple=powerpc-linux-musl | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=powerpc64-linux-musl | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=powerpc64le-linux-musl | FileCheck %s
+
+define fp128 @addkf3(fp128 %a, fp128 %b) {
+; CHECK-LABEL: addkf3:
+; CHECK: __addkf3
+  %1 = fadd fp128 %a, %b
+  ret fp128 %1
+}
+
+define fp128 @subkf3(fp128 %a, fp128 %b) {
+; CHECK-LABEL: subkf3:
+; CHECK: __subkf3
+  %1 = fsub fp128 %a, %b
+  ret fp128 %1
+}
+
+define fp128 @mulkf3(fp128 %a, fp128 %b) {
+; CHECK-LABEL: mulkf3:
+; CHECK: __mulkf3
+  %1 = fmul fp128 %a, %b
+  ret fp128 %1
+}
+
+define fp128 @divkf3(fp128 %a, fp128 %b) {
+; CHECK-LABEL: divkf3:
+; CHECK: __divkf3
+  %1 = fdiv fp128 %a, %b
+  ret fp128 %1
+}
+
+define fp128 @extendsfkf2(float %a) {
+; CHECK-LABEL: extendsfkf2:
+; CHECK: __extendsfkf2
+  %1 = fpext float %a to fp128
+  ret fp128 %1
+}
+
+define fp128 @extenddfkf2(double %a) {
+; CHECK-LABEL: extenddfkf2:
+; CHECK: __extenddfkf2
+  %1 = fpext double %a to fp128
+  ret fp128 %1
+}
+
+define float @trunckfsf2(fp128 %a) {
+; CHECK-LABEL: trunckfsf2:
+; CHECK: __trunckfsf2
+  %1 = fptrunc fp128 %a to float
+  ret float %1
+}
+
+define double @trunckfdf2(fp128 %a) {
+; CHECK-LABEL: trunckfdf2:
+; CHECK: __trunckfdf2
+  %1 = fptrunc fp128 %a to double
+  ret double %1
+}
+
+define i32 @fixkfsi(fp128 %a) {
+; CHECK-LABEL: fixkfsi:
+; CHECK: __fixkfsi
+  %1 = fptosi fp128 %a to i32
+  ret i32 %1
+}
+
+define i64 @fixkfdi(fp128 %a) {
+; CHECK-LABEL: fixkfdi:
+; CHECK: __fixkfdi
+  %1 = fptosi fp128 %a to i64
+  ret i64 %1
+}
+
+define i32 @fixunskfsi(fp128 %a) {
+; CHECK-LABEL: fixunskfsi:
+; CHECK: __fixunskfsi
+  %1 = fptoui fp128 %a to i32
+  ret i32 %1
+}
+
+define i64 @fixunskfdi(fp128 %a) {
+; CHECK-LABEL: fixunskfdi:
+; CHECK: __fixunskfdi
+  %1 = fptoui fp128 %a to i64
+  ret i64 %1
+}
+
+define fp128 @floatsikf(i32 %a) {
+; CHECK-LABEL: floatsikf:
+; CHECK: __floatsikf
+  %1 = sitofp i32 %a to fp128
+  ret fp128 %1
+}
+
+define fp128 @floatdikf(i64 %a) {
+; CHECK-LABEL: floatdikf:
+; CHECK: __floatdikf
+  %1 = sitofp i64 %a to fp128
+  ret fp128 %1
+}
+
+define fp128 @floatunsikf(i32 %a) {
+; CHECK-LABEL: floatunsikf:
+; CHECK: __floatunsikf
+  %1 = uitofp i32 %a to fp128
+  ret fp128 %1
+}
+
+define fp128 @floatundikf(i64 %a) {
+; CHECK-LABEL: floatundikf:
+; CHECK: __floatundikf
+  %1 = uitofp i64 %a to fp128
+  ret fp128 %1
+}
+
+define i1 @test_eqkf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_eqkf2:
+; CHECK: __eqkf2
+  %1 = fcmp oeq fp128 %a, %b
+  ret i1 %1
+}
+
+define i1 @test_nekf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_nekf2:
+; CHECK: __nekf2
+  %1 = fcmp une fp128 %a, %b
+  ret i1 %1
+}
+
+define i1 @test_gekf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_gekf2:
+; CHECK: __gekf2
+  %1 = fcmp oge fp128 %a, %b
+  ret i1 %1
+}
+
+define i1 @test_ltkf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_ltkf2:
+; CHECK: __ltkf2
+  %1 = fcmp olt fp128 %a, %b
+  ret i1 %1
+}
+
+define i1 @test_lekf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_lekf2:
+; CHECK: __lekf2
+  %1 = fcmp ole fp128 %a, %b
+  ret i1 %1
+}
+
+define i1 @test_gtkf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_gtkf2:
+; CHECK: __gtkf2
+  %1 = fcmp ogt fp128 %a, %b
+  ret i1 %1
+}
+
+define i1 @test_unordkf2(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test_unordkf2:
+; CHECK: __unordkf2
+  %1 = fcmp uno fp128 %a, %b
+  ret i1 %1
+}

From f1ee04c42a431d88cd66b884570ca7928cffd0cd Mon Sep 17 00:00:00 2001
From: Serguei Katkov <serguei.katkov@azul.com>
Date: Mon, 15 Jul 2019 05:51:10 +0000
Subject: [PATCH 076/451] [LoopInfo] Introduce getUniqueNonLatchExitBlocks
 utility function

Extract the code from LoopUnrollRuntime into utility function to
re-use it in D63923.

Reviewers: reames, mkuper
Reviewed By: reames
Subscribers: fhahn, hiraditya, zzheng, dmgreen, llvm-commits
Differential Revision: https://reviews.llvm.org/D64548

llvm-svn: 366040
---
 llvm/include/llvm/Analysis/LoopInfo.h         |  8 ++++
 llvm/include/llvm/Analysis/LoopInfoImpl.h     | 32 ++++++++++---
 .../Transforms/Utils/LoopUnrollRuntime.cpp    | 20 +++-----
 llvm/unittests/Analysis/LoopInfoTest.cpp      | 46 +++++++++++++++++++
 4 files changed, 87 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 98b31295ada08..f8f0a0c339d58 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -274,6 +274,14 @@ template <class BlockT, class LoopT> class LoopBase {
   /// dedicated exits.
   void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
 
+  /// Return all unique successor blocks of this loop except successors from
+  /// Latch block are not considered. If the exit comes from Latch has also
+  /// non Latch predecessor in a loop it will be added to ExitBlocks.
+  /// These are the blocks _outside of the current loop_ which are branched to.
+  /// This assumes that loop exits are in canonical form, i.e. all exits are
+  /// dedicated exits.
+  void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+
   /// If getUniqueExitBlocks would return exactly one block, return that block.
   /// Otherwise return null.
   BlockT *getUniqueExitBlock() const;
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 6ff483770c4bc..c9f690dac65eb 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -95,21 +95,25 @@ bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const {
   return true;
 }
 
-template <class BlockT, class LoopT>
-void LoopBase<BlockT, LoopT>::getUniqueExitBlocks(
-    SmallVectorImpl<BlockT *> &ExitBlocks) const {
+// Helper function to get unique loop exits. Pred is a predicate pointing to
+// BasicBlocks in a loop which should be considered to find loop exits.
+template <class BlockT, class LoopT, typename PredicateT>
+void getUniqueExitBlocksHelper(const LoopT *L,
+                               SmallVectorImpl<BlockT *> &ExitBlocks,
+                               PredicateT Pred) {
   typedef GraphTraits<BlockT *> BlockTraits;
   typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits;
 
-  assert(hasDedicatedExits() &&
+  assert(L->hasDedicatedExits() &&
          "getUniqueExitBlocks assumes the loop has canonical form exits!");
 
   SmallVector<BlockT *, 32> SwitchExitBlocks;
-  for (BlockT *Block : this->blocks()) {
+  auto Filtered = make_filter_range(L->blocks(), Pred);
+  for (BlockT *Block : Filtered) {
     SwitchExitBlocks.clear();
     for (BlockT *Successor : children<BlockT *>(Block)) {
       // If block is inside the loop then it is not an exit block.
-      if (contains(Successor))
+      if (L->contains(Successor))
         continue;
 
       BlockT *FirstPred = *InvBlockTraits::child_begin(Successor);
@@ -140,6 +144,22 @@ void LoopBase<BlockT, LoopT>::getUniqueExitBlocks(
   }
 }
 
+template <class BlockT, class LoopT>
+void LoopBase<BlockT, LoopT>::getUniqueExitBlocks(
+    SmallVectorImpl<BlockT *> &ExitBlocks) const {
+  getUniqueExitBlocksHelper(this, ExitBlocks,
+                            [](const BlockT *BB) { return true; });
+}
+
+template <class BlockT, class LoopT>
+void LoopBase<BlockT, LoopT>::getUniqueNonLatchExitBlocks(
+    SmallVectorImpl<BlockT *> &ExitBlocks) const {
+  const BlockT *Latch = getLoopLatch();
+  assert(Latch && "Latch block must exists");
+  getUniqueExitBlocksHelper(this, ExitBlocks,
+                            [Latch](const BlockT *BB) { return BB != Latch; });
+}
+
 template <class BlockT, class LoopT>
 BlockT *LoopBase<BlockT, LoopT>::getUniqueExitBlock() const {
   SmallVector<BlockT *, 8> UniqueExitBlocks;
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index b39d78dde0013..d22fdb4d52dc1 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -424,10 +424,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
 
 /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
 /// is populated with all the loop exit blocks other than the LatchExit block.
-static bool
-canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
-                             BasicBlock *LatchExit, bool PreserveLCSSA,
-                             bool UseEpilogRemainder) {
+static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
+                                         bool PreserveLCSSA,
+                                         bool UseEpilogRemainder) {
 
   // We currently have some correctness constrains in unrolling a multi-exit
   // loop. Check for these below.
@@ -435,11 +434,6 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,
   // We rely on LCSSA form being preserved when the exit blocks are transformed.
   if (!PreserveLCSSA)
     return false;
-  SmallVector<BasicBlock *, 4> Exits;
-  L->getUniqueExitBlocks(Exits);
-  for (auto *BB : Exits)
-    if (BB != LatchExit)
-      OtherExits.push_back(BB);
 
   // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
   // UnrollRuntimeMultiExit is true. This will need updating the logic in
@@ -469,9 +463,8 @@ static bool canProfitablyUnrollMultiExitLoop(
     bool PreserveLCSSA, bool UseEpilogRemainder) {
 
 #if !defined(NDEBUG)
-  SmallVector<BasicBlock *, 8> OtherExitsDummyCheck;
-  assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit,
-                                      PreserveLCSSA, UseEpilogRemainder) &&
+  assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
+                                      UseEpilogRemainder) &&
          "Should be safe to unroll before checking profitability!");
 #endif
 
@@ -595,8 +588,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
 
   // These are exit blocks other than the target of the latch exiting block.
   SmallVector<BasicBlock *, 4> OtherExits;
+  L->getUniqueNonLatchExitBlocks(OtherExits);
   bool isMultiExitUnrollingEnabled =
-      canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
+      canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
                                    UseEpilogRemainder) &&
       canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                        UseEpilogRemainder);
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 005e1dc405b75..953a72aee8e36 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -1110,3 +1110,49 @@ TEST(LoopInfoTest, AuxiliaryIV) {
             L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE));
       });
 }
+
+// Examine getUniqueExitBlocks/getUniqueNonLatchExitBlocks functions.
+TEST(LoopInfoTest, LoopUniqueExitBlocks) {
+  const char *ModuleStr =
+      "target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
+      "define void @foo(i32 %n, i1 %cond) {\n"
+      "entry:\n"
+      "  br label %for.cond\n"
+      "for.cond:\n"
+      "  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]\n"
+      "  %cmp = icmp slt i32 %i.0, %n\n"
+      "  br i1 %cond, label %for.inc, label %for.end1\n"
+      "for.inc:\n"
+      "  %inc = add nsw i32 %i.0, 1\n"
+      "  br i1 %cmp, label %for.cond, label %for.end2, !llvm.loop !0\n"
+      "for.end1:\n"
+      "  br label %for.end\n"
+      "for.end2:\n"
+      "  br label %for.end\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n"
+      "!0 = distinct !{!0, !1}\n"
+      "!1 = !{!\"llvm.loop.distribute.enable\", i1 true}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfo(*M, "foo", [&](Function &F, LoopInfo &LI) {
+    Function::iterator FI = F.begin();
+    // First basic block is entry - skip it.
+    BasicBlock *Header = &*(++FI);
+    assert(Header->getName() == "for.cond");
+    Loop *L = LI.getLoopFor(Header);
+
+    SmallVector<BasicBlock *, 2> Exits;
+    // This loop has 2 unique exits.
+    L->getUniqueExitBlocks(Exits);
+    EXPECT_TRUE(Exits.size() == 2);
+    // And one unique non latch exit.
+    Exits.clear();
+    L->getUniqueNonLatchExitBlocks(Exits);
+    EXPECT_TRUE(Exits.size() == 1);
+  });
+}

From 796ed134ccc3b4d1a27a0c30c17e43db1ced6a89 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 15 Jul 2019 06:35:28 +0000
Subject: [PATCH 077/451] Remove set but unused variable.

llvm-svn: 366041
---
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 9a93e84d80e85..a82047473370a 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -250,7 +250,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-  bool AllSGPRSpilledToVGPRs = false;
   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
     && EnableSpillVGPRToAGPR;
 
@@ -262,8 +261,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
   // handled as SpilledToReg in regular PrologEpilogInserter.
   if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
       SpillVGPRToAGPR) {
-    AllSGPRSpilledToVGPRs = true;
-
     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
     // are spilled to VGPRs, in which case we can eliminate the stack usage.
     //
@@ -299,8 +296,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
           bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
           (void)Spilled;
           assert(Spilled && "failed to spill SGPR to VGPR when allocated");
-        } else
-          AllSGPRSpilledToVGPRs = false;
+        }
       }
     }
 

From 45c43e7d04d3582b6e48c909252e99342800e1c8 Mon Sep 17 00:00:00 2001
From: Serguei Katkov <serguei.katkov@azul.com>
Date: Mon, 15 Jul 2019 06:42:39 +0000
Subject: [PATCH 078/451] [LoopUtils] Extend the scope of
 getLoopEstimatedTripCount

With this patch the getLoopEstimatedTripCount function will
accept also the loops where there are more than one exit but
all exits except latch block should ends up with a call to deopt.

This side exits should not impact the estimated trip count.

Reviewers: reames, mkuper, danielcdh
Reviewed By: reames
Subscribers: fhahn, lebedev.ri, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D64553

llvm-svn: 366042
---
 llvm/lib/Transforms/Utils/LoopUtils.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 29ae77c385dc2..ec226e65f6502 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -621,20 +621,28 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
 }
 
 Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
-  // Only support loops with a unique exiting block, and a latch.
-  if (!L->getExitingBlock())
-    return None;
+  // Support loops with an exiting latch and other existing exists only
+  // deoptimize.
 
   // Get the branch weights for the loop's backedge.
-  BranchInst *LatchBR =
-      dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
-  if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return None;
+  BranchInst *LatchBR = dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch))
     return None;
 
   assert((LatchBR->getSuccessor(0) == L->getHeader() ||
           LatchBR->getSuccessor(1) == L->getHeader()) &&
          "At least one edge out of the latch must go to the header");
 
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L->getUniqueNonLatchExitBlocks(ExitBlocks);
+  if (any_of(ExitBlocks, [](const BasicBlock *EB) {
+        return !EB->getTerminatingDeoptimizeCall();
+      }))
+    return None;
+
   // To estimate the number of times the loop body was executed, we want to
   // know the number of times the backedge was taken, vs. the number of times
   // we exited the loop.

From 54869ec907f322e9fed8942636560c27f482caf1 Mon Sep 17 00:00:00 2001
From: Hideto Ueno <uenoku.tokotoko@gmail.com>
Date: Mon, 15 Jul 2019 06:49:04 +0000
Subject: [PATCH 079/451] [Attributor] Deduce "nonnull" attribute

Summary:
Porting nonnull attribute to attributor.

Reviewers: jdoerfert, sstefan1

Reviewed By: jdoerfert

Subscribers: xbolva00, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63604

llvm-svn: 366043
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  32 ++
 llvm/lib/Transforms/IPO/Attributor.cpp        | 284 ++++++++++++++++
 llvm/test/Transforms/FunctionAttrs/nonnull.ll | 321 ++++++++++++++----
 llvm/test/Transforms/FunctionAttrs/nosync.ll  |   2 +-
 4 files changed, 573 insertions(+), 66 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 9e8a680d82900..3f6a2b6a24e9d 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -263,6 +263,14 @@ struct Attributor {
       Function &F, InformationCache &InfoCache,
       DenseSet</* Attribute::AttrKind */ unsigned> *Whitelist = nullptr);
 
+  /// Check \p Pred on all function call sites.
+  ///
+  /// This method will evaluate \p Pred on call sites and return
+  /// true if \p Pred holds in every call sites. However, this is only possible
+  /// all call sites are known, hence the function has internal linkage.
+  bool checkForAllCallSites(Function &F, std::function<bool(CallSite)> &Pred,
+                            bool RequireAllCallSites);
+
 private:
   /// The set of all abstract attributes.
   ///{
@@ -708,6 +716,30 @@ struct AANoSync : public AbstractAttribute {
   virtual bool isKnownNoSync() const = 0;
 };
 
+/// An abstract interface for all nonnull attributes.
+struct AANonNull : public AbstractAttribute {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANonNull(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANonNull(Value *AssociatedVal, Value &AnchoredValue,
+            InformationCache &InfoCache)
+      : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {}
+
+  /// Return true if we assume that the underlying value is nonnull.
+  virtual bool isAssumedNonNull() const = 0;
+
+  /// Return true if we know that underlying value is nonnull.
+  virtual bool isKnownNonNull() const = 0;
+
+  /// See AbastractState::getAttrKind().
+  Attribute::AttrKind getAttrKind() const override { return ID; }
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::NonNull;
+};
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 5a72865db9d0f..5d18e40b0b92c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/InstIterator.h"
@@ -51,6 +52,10 @@ STATISTIC(NumFnArgumentReturned,
           "Number of function arguments marked returned");
 STATISTIC(NumFnNoSync, "Number of functions marked nosync");
 STATISTIC(NumFnNoFree, "Number of functions marked nofree");
+STATISTIC(NumFnReturnedNonNull,
+          "Number of function return values marked nonnull");
+STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
+STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
 
 // TODO: Determine a good default value.
 //
@@ -108,6 +113,21 @@ static void bookkeeping(AbstractAttribute::ManifestPosition MP,
   case Attribute::NoFree:
     NumFnNoFree++;
     break;
+  case Attribute::NonNull:
+    switch (MP) {
+    case AbstractAttribute::MP_RETURNED:
+      NumFnReturnedNonNull++;
+      break;
+    case AbstractAttribute::MP_ARGUMENT:
+      NumFnArgumentNonNull++;
+      break;
+    case AbstractAttribute::MP_CALL_SITE_ARGUMENT:
+      NumCSArgumentNonNull++;
+      break;
+    default:
+      break;
+    }
+    break;
   default:
     return;
   }
@@ -970,10 +990,252 @@ ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) {
   return ChangeStatus::UNCHANGED;
 }
 
+/// ------------------------ NonNull Argument Attribute ------------------------
+struct AANonNullImpl : AANonNull, BooleanState {
+
+  AANonNullImpl(Value &V, InformationCache &InfoCache)
+      : AANonNull(V, InfoCache) {}
+
+  AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue,
+                InformationCache &InfoCache)
+      : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {}
+
+  /// See AbstractAttribute::getState()
+  /// {
+  AbstractState &getState() override { return *this; }
+  const AbstractState &getState() const override { return *this; }
+  /// }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr() const override {
+    return getAssumed() ? "nonnull" : "may-null";
+  }
+
+  /// See AANonNull::isAssumedNonNull().
+  bool isAssumedNonNull() const override { return getAssumed(); }
+
+  /// See AANonNull::isKnownNonNull().
+  bool isKnownNonNull() const override { return getKnown(); }
+
+  /// Generate a predicate that checks if a given value is assumed nonnull.
+  /// The generated function returns true if a value satisfies any of
+  /// following conditions.
+  /// (i) A value is known nonZero(=nonnull).
+  /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is
+  /// true.
+  std::function<bool(Value &)> generatePredicate(Attributor &);
+};
+
+std::function<bool(Value &)> AANonNullImpl::generatePredicate(Attributor &A) {
+  // FIXME: The `AAReturnedValues` should provide the predicate with the
+  // `ReturnInst` vector as well such that we can use the control flow sensitive
+  // version of `isKnownNonZero`. This should fix `test11` in
+  // `test/Transforms/FunctionAttrs/nonnull.ll`
+
+  std::function<bool(Value &)> Pred = [&](Value &RV) -> bool {
+    if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout()))
+      return true;
+
+    auto *NonNullAA = A.getAAFor<AANonNull>(*this, RV);
+
+    ImmutableCallSite ICS(&RV);
+
+    if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) &&
+        (!ICS || !ICS.hasRetAttr(Attribute::NonNull)))
+      return false;
+
+    return true;
+  };
+
+  return Pred;
+}
+
+/// NonNull attribute for function return value.
+struct AANonNullReturned : AANonNullImpl {
+
+  AANonNullReturned(Function &F, InformationCache &InfoCache)
+      : AANonNullImpl(F, InfoCache) {}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_RETURNED; }
+
+  /// See AbstractAttriubute::initialize(...).
+  void initialize(Attributor &A) override {
+    Function &F = getAnchorScope();
+
+    // Already nonnull.
+    if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
+                                       Attribute::NonNull))
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+};
+
+ChangeStatus AANonNullReturned::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  auto *AARetVal = A.getAAFor<AAReturnedValues>(*this, F);
+  if (!AARetVal) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  std::function<bool(Value &)> Pred = this->generatePredicate(A);
+  if (!AARetVal->checkForallReturnedValues(Pred)) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+  return ChangeStatus::UNCHANGED;
+}
+
+/// NonNull attribute for function argument.
+struct AANonNullArgument : AANonNullImpl {
+
+  AANonNullArgument(Argument &A, InformationCache &InfoCache)
+      : AANonNullImpl(A, InfoCache) {}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; }
+
+  /// See AbstractAttriubute::initialize(...).
+  void initialize(Attributor &A) override {
+    Argument *Arg = cast<Argument>(getAssociatedValue());
+    if (Arg->hasNonNullAttr())
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+};
+
+/// NonNull attribute for a call site argument.
+struct AANonNullCallSiteArgument : AANonNullImpl {
+
+  /// See AANonNullImpl::AANonNullImpl(...).
+  AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo,
+                            InformationCache &InfoCache)
+      : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache),
+        ArgNo(ArgNo) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    CallSite CS(&getAnchoredValue());
+    if (isKnownNonZero(getAssociatedValue(),
+                       getAnchorScope().getParent()->getDataLayout()) ||
+        CS.paramHasAttr(ArgNo, getAttrKind()))
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(Attributor &A).
+  ChangeStatus updateImpl(Attributor &A) override;
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override {
+    return MP_CALL_SITE_ARGUMENT;
+  };
+
+  // Return argument index of associated value.
+  int getArgNo() const { return ArgNo; }
+
+private:
+  unsigned ArgNo;
+};
+ChangeStatus AANonNullArgument::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+  Argument &Arg = cast<Argument>(getAnchoredValue());
+
+  unsigned ArgNo = Arg.getArgNo();
+
+  // Callback function
+  std::function<bool(CallSite)> CallSiteCheck = [&](CallSite CS) {
+    assert(CS && "Sanity check: Call site was not initialized properly!");
+
+    auto *NonNullAA = A.getAAFor<AANonNull>(*this, *CS.getInstruction(), ArgNo);
+
+    // Check that NonNullAA is AANonNullCallSiteArgument.
+    if (NonNullAA) {
+      ImmutableCallSite ICS(&NonNullAA->getAnchoredValue());
+      if (ICS && CS.getInstruction() == ICS.getInstruction())
+        return NonNullAA->isAssumedNonNull();
+      return false;
+    }
+
+    if (CS.paramHasAttr(ArgNo, Attribute::NonNull))
+      return true;
+
+    Value *V = CS.getArgOperand(ArgNo);
+    if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout()))
+      return true;
+
+    return false;
+  };
+  if (!A.checkForAllCallSites(F, CallSiteCheck, true)) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+  return ChangeStatus::UNCHANGED;
+}
+
+ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
+  // NOTE: Never look at the argument of the callee in this method.
+  //       If we do this, "nonnull" is always deduced because of the assumption.
+
+  Value &V = *getAssociatedValue();
+
+  auto *NonNullAA = A.getAAFor<AANonNull>(*this, V);
+
+  if (!NonNullAA || !NonNullAA->isAssumedNonNull()) {
+    indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  return ChangeStatus::UNCHANGED;
+}
+
 /// ----------------------------------------------------------------------------
 ///                               Attributor
 /// ----------------------------------------------------------------------------
 
+bool Attributor::checkForAllCallSites(Function &F,
+                                      std::function<bool(CallSite)> &Pred,
+                                      bool RequireAllCallSites) {
+  // We can try to determine information from
+  // the call sites. However, this is only possible all call sites are known,
+  // hence the function has internal linkage.
+  if (RequireAllCallSites && !F.hasInternalLinkage()) {
+    LLVM_DEBUG(
+        dbgs()
+        << "Attributor: Function " << F.getName()
+        << " has no internal linkage, hence not all call sites are known\n");
+    return false;
+  }
+
+  for (const Use &U : F.uses()) {
+
+    CallSite CS(U.getUser());
+    dbgs() << *CS.getInstruction() << "\n";
+    if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
+      if (!RequireAllCallSites)
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser()
+                        << " is an invalid use of " << F.getName() << "\n");
+      return false;
+    }
+
+    if (Pred(CS))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for "
+                      << *CS.getInstruction() << "\n");
+    return false;
+  }
+
+  return true;
+}
+
 ChangeStatus Attributor::run() {
   // Initialize all abstract attributes.
   for (AbstractAttribute *AA : AllAbstractAttributes)
@@ -1128,6 +1390,17 @@ void Attributor::identifyDefaultAbstractAttributes(
     // though it is an argument attribute.
     if (!Whitelist || Whitelist->count(AAReturnedValues::ID))
       registerAA(*new AAReturnedValuesImpl(F, InfoCache));
+
+    // Every function with pointer return type might be marked nonnull.
+    if (ReturnType->isPointerTy() &&
+        (!Whitelist || Whitelist->count(AANonNullReturned::ID)))
+      registerAA(*new AANonNullReturned(F, InfoCache));
+  }
+
+  // Every argument with pointer type might be marked nonnull.
+  for (Argument &Arg : F.args()) {
+    if (Arg.getType()->isPointerTy())
+      registerAA(*new AANonNullArgument(Arg, InfoCache));
   }
 
   // Walk all instructions to find more attribute opportunities and also
@@ -1163,6 +1436,17 @@ void Attributor::identifyDefaultAbstractAttributes(
       InstOpcodeMap[I.getOpcode()].push_back(&I);
     if (I.mayReadOrWriteMemory())
       ReadOrWriteInsts.push_back(&I);
+
+    CallSite CS(&I);
+    if (CS && CS.getCalledFunction()) {
+      for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) {
+        if (!CS.getArgument(i)->getType()->isPointerTy())
+          continue;
+
+        // Call site argument attribute "non-null".
+        registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i);
+      }
+    }
   }
 }
 
diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll
index 6c04e4907d8e8..c37defc205aaa 100644
--- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll
@@ -1,31 +1,34 @@
-; RUN: opt -S -functionattrs -enable-nonnull-arg-prop %s | FileCheck %s
-; RUN: opt -S -passes=function-attrs -enable-nonnull-arg-prop %s | FileCheck %s
+; RUN: opt -S -functionattrs -enable-nonnull-arg-prop %s | FileCheck %s --check-prefixes=BOTH,FNATTR
+; RUN: opt -S -passes=function-attrs -enable-nonnull-arg-prop %s | FileCheck %s --check-prefixes=BOTH,FNATTR
+; RUN: opt -attributor --attributor-disable=false -S < %s | FileCheck %s --check-prefixes=BOTH,ATTRIBUTOR
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 declare nonnull i8* @ret_nonnull()
 
 ; Return a pointer trivially nonnull (call return attribute)
 define i8* @test1() {
-; CHECK: define nonnull i8* @test1
+; BOTH: define nonnull i8* @test1
   %ret = call i8* @ret_nonnull()
   ret i8* %ret
 }
 
 ; Return a pointer trivially nonnull (argument attribute)
 define i8* @test2(i8* nonnull %p) {
-; CHECK: define nonnull i8* @test2
+; BOTH: define nonnull i8* @test2
   ret i8* %p
 }
 
 ; Given an SCC where one of the functions can not be marked nonnull,
 ; can we still mark the other one which is trivially nonnull
 define i8* @scc_binder() {
-; CHECK: define i8* @scc_binder
+; BOTH: define i8* @scc_binder
   call i8* @test3()
   ret i8* null
 }
 
 define i8* @test3() {
-; CHECK: define nonnull i8* @test3
+; BOTH: define nonnull i8* @test3
   call i8* @scc_binder()
   %ret = call i8* @ret_nonnull()
   ret i8* %ret
@@ -35,13 +38,15 @@ define i8* @test3() {
 ; nonnull if neither can ever return null.  (In this case, they
 ; just never return period.)
 define i8* @test4_helper() {
-; CHECK: define noalias nonnull i8* @test4_helper
+; FNATTR: define noalias nonnull i8* @test4_helper
+; ATTRIBUTOR: define nonnull i8* @test4_helper
   %ret = call i8* @test4()
   ret i8* %ret
 }
 
 define i8* @test4() {
-; CHECK: define noalias nonnull i8* @test4
+; FNATTR: define noalias nonnull i8* @test4
+; ATTRIBUTOR: define nonnull i8* @test4
   %ret = call i8* @test4_helper()
   ret i8* %ret
 }
@@ -49,13 +54,15 @@ define i8* @test4() {
 ; Given a mutual recursive set of functions which *can* return null
 ; make sure we haven't marked them as nonnull.
 define i8* @test5_helper() {
-; CHECK: define noalias i8* @test5_helper
+; FNATTR: define noalias i8* @test5_helper
+; ATTRIBUTOR: define i8* @test5_helper
   %ret = call i8* @test5()
   ret i8* null
 }
 
 define i8* @test5() {
-; CHECK: define noalias i8* @test5
+; FNATTR: define noalias i8* @test5
+; ATTRIBUTOR: define i8* @test5
   %ret = call i8* @test5_helper()
   ret i8* %ret
 }
@@ -63,7 +70,7 @@ define i8* @test5() {
 ; Local analysis, but going through a self recursive phi
 define i8* @test6() {
 entry:
-; CHECK: define nonnull i8* @test6
+; BOTH: define nonnull i8* @test6
   %ret = call i8* @ret_nonnull()
   br label %loop
 loop:
@@ -73,6 +80,144 @@ exit:
   ret i8* %phi
 }
 
+; BOTH: define i8* @test7
+define i8* @test7(i8* %a) {
+  %b = getelementptr inbounds i8, i8* %a, i64 0
+  ret i8* %b
+}
+
+; BOTH: define nonnull i8* @test8
+define i8* @test8(i8* %a) {
+  %b = getelementptr inbounds i8, i8* %a, i64 1
+  ret i8* %b
+}
+
+; BOTH: define i8* @test9
+define i8* @test9(i8* %a, i64 %n) {
+  %b = getelementptr inbounds i8, i8* %a, i64 %n
+  ret i8* %b
+}
+
+declare void @llvm.assume(i1)
+; FNATTR: define i8* @test10
+; FIXME: missing nonnull
+; ATTRIBUTOR: define i8* @test10
+define i8* @test10(i8* %a, i64 %n) {
+  %cmp = icmp ne i64 %n, 0
+  call void @llvm.assume(i1 %cmp)
+  %b = getelementptr inbounds i8, i8* %a, i64 %n
+  ret i8* %b
+}
+
+; TEST 11
+; char* test11(char *p) {
+;   return p? p: nonnull();
+; }
+; FNATTR: define i8* @test11
+; FIXME: missing nonnull
+; ATTRIBUTOR: define i8* @test11
+define i8* @test11(i8*) local_unnamed_addr {
+  %2 = icmp eq i8* %0, null
+  br i1 %2, label %3, label %5
+
+; <label>:3:                                      ; preds = %1
+  %4 = tail call i8* @ret_nonnull()
+  br label %5
+
+; <label>:5:                                      ; preds = %3, %1
+  %6 = phi i8* [ %4, %3 ], [ %0, %1 ]
+  ret i8* %6
+}
+
+; TEST 12
+; Simple CallSite Test
+declare void @test12_helper(i8*)
+define void @test12(i8* nonnull %a) {
+; ATTRIBUTOR: define void @test12(i8* nonnull %a)
+; ATTRIBUTOR-NEXT: tail call void @test12_helper(i8* nonnull %a)
+  tail call void @test12_helper(i8* %a)
+  ret void
+}
+
+; TEST 13
+; Simple Argument Tests
+declare i8* @unknown()
+define void @test13_helper() {
+  %nonnullptr = tail call i8* @ret_nonnull()
+  %maybenullptr = tail call i8* @unknown()
+  tail call void @test13(i8* %nonnullptr, i8* %nonnullptr, i8* %maybenullptr)
+  tail call void @test13(i8* %nonnullptr, i8* %maybenullptr, i8* %nonnullptr)
+  ret void
+}
+define internal void @test13(i8* %a, i8* %b, i8* %c) {
+; ATTRIBUTOR: define internal void @test13(i8* nonnull %a, i8* %b, i8* %c) 
+  ret void
+}
+
+declare nonnull i8* @nonnull()
+
+; TEST 14
+; Complex propagation
+; Argument of f1, f2, f3 can be marked with nonnull.
+
+; * Argument
+; 1. In f1:bb6, %arg can be marked with nonnull because of the comparison in bb1
+; 2. Because f2 is internal function, f2(i32* %arg) -> @f2(i32* nonnull %arg)
+; 3. In f1:bb4 %tmp5 is nonnull and f3 is internal function. 
+;    Then, f3(i32* %arg) -> @f3(i32* nonnull %arg)
+; 4. We get nonnull in whole f1 call sites so f1(i32* %arg) -> @f1(i32* nonnull %arg)
+
+
+define internal i32* @f1(i32* %arg) {
+; FIXME: missing nonnull It should be nonnull @f1(i32* nonnull %arg)
+; ATTRIBUTOR: define internal nonnull i32* @f1(i32* %arg)
+
+bb:
+  %tmp = icmp eq i32* %arg, null
+  br i1 %tmp, label %bb9, label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = load i32, i32* %arg, align 4
+  %tmp3 = icmp eq i32 %tmp2, 0
+  br i1 %tmp3, label %bb6, label %bb4
+
+bb4:                                              ; preds = %bb1
+  %tmp5 = getelementptr inbounds i32, i32* %arg, i64 1
+; ATTRIBUTOR: %tmp5b = tail call i32* @f3(i32* nonnull %tmp5)
+  %tmp5b = tail call i32* @f3(i32* %tmp5)
+  br label %bb9
+
+bb6:                                              ; preds = %bb1
+; FIXME: missing nonnull. It should be @f2(i32* nonnull %arg)
+; ATTRIBUTOR: %tmp7 = tail call i32* @f2(i32* %arg)
+  %tmp7 = tail call i32* @f2(i32* %arg)
+  ret i32* %tmp7
+
+bb9:                                              ; preds = %bb4, %bb
+  %tmp10 = phi i32* [ %tmp5, %bb4 ], [ inttoptr (i64 4 to i32*), %bb ]
+  ret i32* %tmp10
+}
+
+define internal i32* @f2(i32* %arg) {
+; FIXME: missing nonnull. It should be nonnull @f2(i32* nonnull %arg) 
+; ATTRIBUTOR: define internal nonnull i32* @f2(i32* %arg)
+bb:
+
+; FIXME: missing nonnull. It should be @f1(i32* nonnull %arg) 
+; ATTRIBUTOR:   %tmp = tail call i32* @f1(i32* %arg)
+  %tmp = tail call i32* @f1(i32* %arg)
+  ret i32* %tmp
+}
+
+define dso_local noalias i32* @f3(i32* %arg) {
+; FIXME: missing nonnull. It should be nonnull @f3(i32* nonnull %arg) 
+; ATTRIBUTOR: define dso_local noalias i32* @f3(i32* %arg)
+bb:
+; FIXME: missing nonnull. It should be @f1(i32* nonnull %arg) 
+; ATTRIBUTOR:   %tmp = call i32* @f1(i32* %arg)
+  %tmp = call i32* @f1(i32* %arg)
+  ret i32* null
+}
 ; Test propagation of nonnull callsite args back to caller.
 
 declare void @use1(i8* %x)
@@ -88,11 +233,11 @@ declare i8 @use1safecall(i8* %x) readonly nounwind ; readonly+nounwind guarantee
 ; Can't extend non-null to parent for any argument because the 2nd call is not guaranteed to execute.
 
 define void @parent1(i8* %a, i8* %b, i8* %c) {
-; CHECK-LABEL: @parent1(i8* %a, i8* %b, i8* %c)
-; CHECK-NEXT:    call void @use3(i8* %c, i8* %a, i8* %b)
-; CHECK-NEXT:    call void @use3nonnull(i8* %b, i8* %c, i8* %a)
-; CHECK-NEXT:    ret void
-;
+; BOTH-LABEL: @parent1(i8* %a, i8* %b, i8* %c)
+; BOTH-NEXT:    call void @use3(i8* %c, i8* %a, i8* %b)
+; FNATTR-NEXT:    call void @use3nonnull(i8* %b, i8* %c, i8* %a)
+; ATTRIBUTOR-NEXT:    call void @use3nonnull(i8* nonnull %b, i8* nonnull %c, i8* nonnull %a)
+; BOTH-NEXT:    ret void
   call void @use3(i8* %c, i8* %a, i8* %b)
   call void @use3nonnull(i8* %b, i8* %c, i8* %a)
   ret void
@@ -101,11 +246,20 @@ define void @parent1(i8* %a, i8* %b, i8* %c) {
 ; Extend non-null to parent for all arguments.
 
 define void @parent2(i8* %a, i8* %b, i8* %c) {
-; CHECK-LABEL: @parent2(i8* nonnull %a, i8* nonnull %b, i8* nonnull %c)
-; CHECK-NEXT:    call void @use3nonnull(i8* %b, i8* %c, i8* %a)
-; CHECK-NEXT:    call void @use3(i8* %c, i8* %a, i8* %b)
-; CHECK-NEXT:    ret void
-;
+; FNATTR-LABEL: @parent2(i8* nonnull %a, i8* nonnull %b, i8* nonnull %c)
+; FNATTR-NEXT:    call void @use3nonnull(i8* %b, i8* %c, i8* %a)
+; FNATTR-NEXT:    call void @use3(i8* %c, i8* %a, i8* %b)
+
+; FIXME: missing "nonnull", it should be
+; @parent2(i8* nonnull %a, i8* nonnull %b, i8* nonnull %c)
+;     call void @use3nonnull(i8* nonnull %b, i8* nonnull %c, i8* nonnull %a)
+;     call void @use3(i8* nonnull %c, i8* nonnull %a, i8* nonnull %b)
+
+; ATTRIBUTOR-LABEL: @parent2(i8* %a, i8* %b, i8* %c)
+; ATTRIBUTOR-NEXT:    call void @use3nonnull(i8* nonnull %b, i8* nonnull %c, i8* nonnull %a)
+; ATTRIBUTOR-NEXT:    call void @use3(i8* %c, i8* %a, i8* %b)
+
+; BOTH-NEXT:    ret void
   call void @use3nonnull(i8* %b, i8* %c, i8* %a)
   call void @use3(i8* %c, i8* %a, i8* %b)
   ret void
@@ -114,11 +268,20 @@ define void @parent2(i8* %a, i8* %b, i8* %c) {
 ; Extend non-null to parent for 1st argument.
 
 define void @parent3(i8* %a, i8* %b, i8* %c) {
-; CHECK-LABEL: @parent3(i8* nonnull %a, i8* %b, i8* %c)
-; CHECK-NEXT:    call void @use1nonnull(i8* %a)
-; CHECK-NEXT:    call void @use3(i8* %c, i8* %b, i8* %a)
-; CHECK-NEXT:    ret void
-;
+; FNATTR-LABEL: @parent3(i8* nonnull %a, i8* %b, i8* %c)
+; FNATTR-NEXT:    call void @use1nonnull(i8* %a)
+; FNATTR-NEXT:    call void @use3(i8* %c, i8* %b, i8* %a)
+
+; FIXME: missing "nonnull", it should be,
+; @parent3(i8* nonnull %a, i8* %b, i8* %c)
+;    call void @use1nonnull(i8* nonnull %a)
+;    call void @use3(i8* %c, i8* %b, i8* nonnull %a)
+; ATTRIBUTOR-LABEL: @parent3(i8* %a, i8* %b, i8* %c)
+; ATTRIBUTOR-NEXT:    call void @use1nonnull(i8* nonnull %a)
+; ATTRIBUTOR-NEXT:    call void @use3(i8* %c, i8* %b, i8* %a)
+
+; BOTH-NEXT:  ret void
+
   call void @use1nonnull(i8* %a)
   call void @use3(i8* %c, i8* %b, i8* %a)
   ret void
@@ -131,8 +294,20 @@ define void @parent4(i8* %a, i8* %b, i8* %c) {
 ; CHECK-NEXT:    call void @use2nonnull(i8* %c, i8* %b)
 ; CHECK-NEXT:    call void @use2(i8* %a, i8* %c)
 ; CHECK-NEXT:    call void @use1(i8* %b)
-; CHECK-NEXT:    ret void
-;
+
+; FIXME : missing "nonnull", it should be
+; @parent4(i8* %a, i8* nonnull %b, i8* nonnull %c)
+;   call void @use2nonnull(i8* nonnull %c, i8* nonull %b)
+;   call void @use2(i8* %a, i8* nonnull %c)
+;   call void @use1(i8* nonnull %b)
+
+; ATTRIBUTOR-LABEL: @parent4(i8* %a, i8* %b, i8* %c)
+; ATTRIBUTOR-NEXT:    call void @use2nonnull(i8* nonnull %c, i8* nonnull %b)
+; ATTRIBUTOR-NEXT:    call void @use2(i8* %a, i8* %c)
+; ATTRIBUTOR-NEXT:    call void @use1(i8* %b)
+
+; BOTH: ret void
+
   call void @use2nonnull(i8* %c, i8* %b)
   call void @use2(i8* %a, i8* %c)
   call void @use1(i8* %b)
@@ -144,14 +319,15 @@ define void @parent4(i8* %a, i8* %b, i8* %c) {
 ; because it would incorrectly propagate the wrong information to its callers.
 
 define void @parent5(i8* %a, i1 %a_is_notnull) {
-; CHECK-LABEL: @parent5(i8* %a, i1 %a_is_notnull)
-; CHECK-NEXT:    br i1 %a_is_notnull, label %t, label %f
-; CHECK:       t:
-; CHECK-NEXT:    call void @use1nonnull(i8* %a)
-; CHECK-NEXT:    ret void
-; CHECK:       f:
-; CHECK-NEXT:    ret void
-;
+; BOTH: @parent5(i8* %a, i1 %a_is_notnull)
+; BOTH-NEXT:    br i1 %a_is_notnull, label %t, label %f
+; BOTH:       t:
+; FNATTR-NEXT:    call void @use1nonnull(i8* %a)
+; ATTRIBUTOR-NEXT:    call void @use1nonnull(i8* nonnull %a)
+; BOTH-NEXT:    ret void
+; BOTH:       f:
+; BOTH-NEXT:    ret void
+
   br i1 %a_is_notnull, label %t, label %f
 t:
   call void @use1nonnull(i8* %a)
@@ -164,11 +340,12 @@ f:
 ; The volatile load might trap, so there's no guarantee that we'll ever get to the call.
 
 define i8 @parent6(i8* %a, i8* %b) {
-; CHECK-LABEL: @parent6(i8* %a, i8* %b)
-; CHECK-NEXT:    [[C:%.*]] = load volatile i8, i8* %b
-; CHECK-NEXT:    call void @use1nonnull(i8* %a)
-; CHECK-NEXT:    ret i8 [[C]]
-;
+; BOTH-LABEL: @parent6(i8* %a, i8* %b)
+; BOTH-NEXT:    [[C:%.*]] = load volatile i8, i8* %b
+; FNATTR-NEXT:    call void @use1nonnull(i8* %a)
+; ATTRIBUTOR-NEXT:    call void @use1nonnull(i8* nonnull %a)
+; BOTH-NEXT:    ret i8 [[C]]
+
   %c = load volatile i8, i8* %b
   call void @use1nonnull(i8* %a)
   ret i8 %c
@@ -177,11 +354,22 @@ define i8 @parent6(i8* %a, i8* %b) {
 ; The nonnull callsite is guaranteed to execute, so the argument must be nonnull throughout the parent.
 
 define i8 @parent7(i8* %a) {
-; CHECK-LABEL: @parent7(i8* nonnull %a)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @use1safecall(i8* %a)
-; CHECK-NEXT:    call void @use1nonnull(i8* %a)
-; CHECK-NEXT:    ret i8 [[RET]]
-;
+; FNATTR-LABEL: @parent7(i8* nonnull %a)
+; FNATTR-NEXT:    [[RET:%.*]] = call i8 @use1safecall(i8* %a)
+; FNATTR-NEXT:    call void @use1nonnull(i8* %a)
+
+; FIXME : missing "nonnull", it should be
+; @parent7(i8* nonnull %a)
+;   [[RET:%.*]] = call i8 @use1safecall(i8* nonnull %a)
+;   call void @use1nonnull(i8* nonnull %a)
+;   ret i8 [[RET]]
+
+; ATTRIBUTOR-LABEL: @parent7(i8* %a)
+; ATTRIBUTOR-NEXT:    [[RET:%.*]] = call i8 @use1safecall(i8* %a)
+; ATTRIBUTOR-NEXT:    call void @use1nonnull(i8* nonnull %a)
+
+; BOTH-NEXT: ret i8 [[RET]]
+
   %ret = call i8 @use1safecall(i8* %a)
   call void @use1nonnull(i8* %a)
   ret i8 %ret
@@ -192,18 +380,21 @@ define i8 @parent7(i8* %a) {
 declare i32 @esfp(...)
 
 define i1 @parent8(i8* %a, i8* %bogus1, i8* %b) personality i8* bitcast (i32 (...)* @esfp to i8*){
-; CHECK-LABEL: @parent8(i8* nonnull %a, i8* nocapture readnone %bogus1, i8* nonnull %b)
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    invoke void @use2nonnull(i8* %a, i8* %b)
-; CHECK-NEXT:    to label %cont unwind label %exc
-; CHECK:       cont:
-; CHECK-NEXT:    [[NULL_CHECK:%.*]] = icmp eq i8* %b, null
-; CHECK-NEXT:    ret i1 [[NULL_CHECK]]
-; CHECK:       exc:
-; CHECK-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
-; CHECK-NEXT:    filter [0 x i8*] zeroinitializer
-; CHECK-NEXT:    unreachable
-;
+; FNATTR-LABEL: @parent8(i8* nonnull %a, i8* nocapture readnone %bogus1, i8* nonnull %b)
+; FIXME : missing "nonnull", it should be @parent8(i8* nonnull %a, i8* %bogus1, i8* nonnull %b)
+; ATTRIBUTOR-LABEL: @parent8(i8* %a, i8* %bogus1, i8* %b)
+; BOTH-NEXT:  entry:
+; FNATTR-NEXT:    invoke void @use2nonnull(i8* %a, i8* %b)
+; ATTRIBUTOR-NEXT:    invoke void @use2nonnull(i8* nonnull %a, i8* nonnull %b)
+; BOTH-NEXT:    to label %cont unwind label %exc
+; BOTH:       cont:
+; BOTH-NEXT:    [[NULL_CHECK:%.*]] = icmp eq i8* %b, null
+; BOTH-NEXT:    ret i1 [[NULL_CHECK]]
+; BOTH:       exc:
+; BOTH-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
+; BOTH-NEXT:    filter [0 x i8*] zeroinitializer
+; BOTH-NEXT:    unreachable
+
 entry:
   invoke void @use2nonnull(i8* %a, i8* %b)
   to label %cont unwind label %exc
@@ -218,7 +409,7 @@ exc:
   unreachable
 }
 
-; CHECK: define nonnull i32* @gep1(
+; BOTH: define nonnull i32* @gep1(
 define i32* @gep1(i32* %p) {
   %q = getelementptr inbounds i32, i32* %p, i32 1
   ret i32* %q
@@ -226,24 +417,24 @@ define i32* @gep1(i32* %p) {
 
 define i32* @gep1_no_null_opt(i32* %p) #0 {
 ; Should't be able to derive nonnull based on gep.
-; CHECK: define i32* @gep1_no_null_opt(
+; BOTH: define i32* @gep1_no_null_opt(
   %q = getelementptr inbounds i32, i32* %p, i32 1
   ret i32* %q
 }
 
-; CHECK: define i32 addrspace(3)* @gep2(
+; BOTH: define i32 addrspace(3)* @gep2(
 define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) {
   %q = getelementptr inbounds i32, i32 addrspace(3)* %p, i32 1
   ret i32 addrspace(3)* %q
 }
 
-; CHECK: define internal nonnull i32* @f2()
-define internal i32* @f2() {
+; BOTH: define internal nonnull i32* @g2()
+define internal i32* @g2() {
   ret i32* inttoptr (i64 4 to i32*)
 }
 
-define  i32* @f1() {
- %c = call i32* @f2()
+define  i32* @g1() {
+ %c = call i32* @g2()
   ret i32* %c
 }
 
diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll
index a47d791c6646b..0769a1744abd9 100644
--- a/llvm/test/Transforms/FunctionAttrs/nosync.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll
@@ -28,7 +28,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 ; FNATTR: Function Attrs: norecurse nounwind optsize readnone ssp uwtable
 ; FNATTR-NEXT: define nonnull i32* @foo(%struct.ST* readnone %s)
 ; ATTRIBUTOR: Function Attrs: nofree nosync nounwind optsize readnone ssp uwtable
-; ATTRIBUTOR-NEXT: define i32* @foo(%struct.ST* %s)
+; ATTRIBUTOR-NEXT: define nonnull i32* @foo(%struct.ST* %s)
 define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
 entry:
   %arrayidx = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13

From 6bd02a442c0e4cadd84cf7ac37df533e4160e765 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 15 Jul 2019 07:25:11 +0000
Subject: [PATCH 080/451] [PowerPC] Support -mabi=ieeelongdouble and
 -mabi=ibmlongdouble

gcc PowerPC supports 3 representations of long double:

* -mlong-double-64

  long double has the same representation of double but is mangled as `e`.
  In clang, this is the default on AIX, FreeBSD and Linux musl.

* -mlong-double-128

  2 possible 128-bit floating point representations:

  + -mabi=ibmlongdouble
    IBM extended double format. Mangled as `g`
    In clang, this is the default on Linux glibc.
  + -mabi=ieeelongdouble
    IEEE 754 quadruple-precision format. Mangled as `u9__ieee128` (`U10__float128` before gcc 8.2)
    This is currently unavailable.

This patch adds -mabi=ibmlongdouble and -mabi=ieeelongdouble, and thus
makes the IEEE 754 quadruple-precision long double available for
languages supported by clang.

Reviewed By: hfinkel

Differential Revision: https://reviews.llvm.org/D64283

llvm-svn: 366044
---
 clang/include/clang/Basic/LangOptions.def |  1 +
 clang/include/clang/Driver/CC1Options.td  |  2 ++
 clang/lib/Basic/Targets/PPC.cpp           |  4 +++-
 clang/lib/Driver/ToolChains/Clang.cpp     | 19 ++++++++++++++-----
 clang/lib/Frontend/CompilerInvocation.cpp |  1 +
 clang/test/CodeGen/ppc64-long-double.cpp  | 11 +++++++++++
 clang/test/Driver/ppc-abi.c               | 18 ++++++++++++++++++
 7 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 46012f73aa8b8..bbe3f7b77dbc6 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -172,6 +172,7 @@ VALUE_LANGOPT(MaxTypeAlign  , 32, 0,
               "default maximum alignment for types")
 VALUE_LANGOPT(AlignDouble            , 1, 0, "Controls if doubles should be aligned to 8 bytes (x86 only)")
 VALUE_LANGOPT(LongDoubleSize        , 32, 0, "width of long double")
+LANGOPT(PPCIEEELongDouble            , 1, 0, "use IEEE 754 quadruple-precision for long double")
 COMPATIBLE_VALUE_LANGOPT(PICLevel    , 2, 0, "__PIC__ level")
 COMPATIBLE_VALUE_LANGOPT(PIE         , 1, 0, "is pie")
 LANGOPT(ROPI                         , 1, 0, "Read-only position independence")
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index 8862a015827bd..1f6c000ecf6a1 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -298,6 +298,8 @@ def menable_unsafe_fp_math : Flag<["-"], "menable-unsafe-fp-math">,
            "precision">;
 def mreassociate : Flag<["-"], "mreassociate">,
   HelpText<"Allow reassociation transformations for floating-point instructions">;
+def mabi_EQ_ieeelongdouble : Flag<["-"], "mabi=ieeelongdouble">,
+  HelpText<"Use IEEE 754 quadruple-precision for long double">;
 def mfloat_abi : Separate<["-"], "mfloat-abi">,
   HelpText<"The float ABI to use">;
 def mtp : Separate<["-"], "mtp">,
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index bd0ae60038092..2a773d9992869 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -466,7 +466,9 @@ void PPCTargetInfo::adjust(LangOptions &Opts) {
     Opts.AltiVec = 1;
   TargetInfo::adjust(Opts);
   if (LongDoubleFormat != &llvm::APFloat::IEEEdouble())
-    LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble();
+    LongDoubleFormat = Opts.PPCIEEELongDouble
+                           ? &llvm::APFloat::IEEEquad()
+                           : &llvm::APFloat::PPCDoubleDouble();
 }
 
 ArrayRef<Builtin::Info> PPCTargetInfo::getTargetBuiltins() const {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index c84cb480f1a54..6a83e1a480a5d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1803,12 +1803,21 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
       break;
     }
 
-  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
-    // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
-    // the option if given as we don't have backend support for any targets
-    // that don't use the altivec abi.
-    if (StringRef(A->getValue()) != "altivec")
+  bool IEEELongDouble = false;
+  for (const Arg *A : Args.filtered(options::OPT_mabi_EQ)) {
+    StringRef V = A->getValue();
+    if (V == "ieeelongdouble")
+      IEEELongDouble = true;
+    else if (V == "ibmlongdouble")
+      IEEELongDouble = false;
+    else if (V != "altivec")
+      // The ppc64 linux abis are all "altivec" abis by default. Accept and ignore
+      // the option if given as we don't have backend support for any targets
+      // that don't use the altivec abi.
       ABIName = A->getValue();
+  }
+  if (IEEELongDouble)
+    CmdArgs.push_back("-mabi=ieeelongdouble");
 
   ppc::FloatABI FloatABI =
       ppc::getPPCFloatABI(getToolChain().getDriver(), Args);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7a07d9955fe74..8a9844096f081 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2745,6 +2745,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.LongDoubleSize = Args.hasArg(OPT_mlong_double_128)
                             ? 128
                             : Args.hasArg(OPT_mlong_double_64) ? 64 : 0;
+  Opts.PPCIEEELongDouble = Args.hasArg(OPT_mabi_EQ_ieeelongdouble);
   Opts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
   Opts.ROPI = Args.hasArg(OPT_fropi);
   Opts.RWPI = Args.hasArg(OPT_frwpi);
diff --git a/clang/test/CodeGen/ppc64-long-double.cpp b/clang/test/CodeGen/ppc64-long-double.cpp
index d588d6b80e963..a35965f2bf684 100644
--- a/clang/test/CodeGen/ppc64-long-double.cpp
+++ b/clang/test/CodeGen/ppc64-long-double.cpp
@@ -3,6 +3,14 @@
 // RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm -o - %s -mlong-double-64 | \
 // RUN:   FileCheck --check-prefix=FP64 %s
 
+// musl defaults to -mlong-double-64, so -mlong-double-128 is needed to make
+// -mabi=ieeelongdouble effective.
+// RUN: %clang_cc1 -triple powerpc64-linux-musl -emit-llvm -o - %s -mlong-double-128 \
+// RUN:   -mabi=ieeelongdouble | FileCheck --check-prefix=FP128 %s
+// RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm -o - %s \
+// RUN:   -mabi=ieeelongdouble | FileCheck --check-prefix=FP128 %s
+
+// IBM extended double is the default.
 // RUN: %clang_cc1 -triple powerpc64-linux-gnu -emit-llvm -o - %s | \
 // RUN:   FileCheck --check-prefix=IBM128 %s
 // RUN: %clang_cc1 -triple powerpc64-linux-musl -emit-llvm -o - -mlong-double-128 %s | \
@@ -13,10 +21,13 @@ int size = sizeof(x);
 
 // FP64: @x = global double {{.*}}, align 8
 // FP64: @size = global i32 8
+// FP128: @x = global fp128 {{.*}}, align 16
+// FP128: @size = global i32 16
 // IBM128: @x = global ppc_fp128 {{.*}}, align 16
 // IBM128: @size = global i32 16
 
 long double foo(long double d) { return d; }
 
 // FP64: double @_Z3fooe(double %d)
+// FP128: fp128 @_Z3foou9__ieee128(fp128 %d)
 // IBM128: ppc_fp128 @_Z3foog(ppc_fp128 %d)
diff --git a/clang/test/Driver/ppc-abi.c b/clang/test/Driver/ppc-abi.c
index a82a01de2781e..fdcf45da2900d 100644
--- a/clang/test/Driver/ppc-abi.c
+++ b/clang/test/Driver/ppc-abi.c
@@ -66,4 +66,22 @@
 // CHECK-ELFv2-PIC: "-mrelocation-model" "pic" "-pic-level" "2"
 // CHECK-ELFv2-PIC: "-target-abi" "elfv2"
 
+// Check -mabi=ieeelongdouble is passed through but it does not change -target-abi.
+// RUN: %clang -target powerpc64le-linux-gnu %s -mabi=ieeelongdouble -mabi=elfv1 -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ELFv1-IEEE %s
+// RUN: %clang -target powerpc64le-linux-gnu %s -mabi=elfv1 -mabi=ieeelongdouble -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ELFv1-IEEE %s
+// RUN: %clang -target powerpc64le-linux-gnu %s -mabi=elfv2 -mabi=elfv1 -mabi=ibmlongdouble -mabi=ieeelongdouble -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ELFv1-IEEE %s
 
+// CHECK-ELFv1-IEEE: "-mabi=ieeelongdouble"
+// CHECK-ELFv1-IEEE: "-target-abi" "elfv1"
+
+// Check -mabi=ibmlongdouble is the default.
+// RUN: %clang -target powerpc64le-linux-gnu %s -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ELFv2-IBM128 %s
+// RUN: %clang -target powerpc64le-linux-gnu %s -mabi=ibmlongdouble -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ELFv2-IBM128 %s
+
+// CHECK-ELFv2-IBM128-NOT: "-mabi=ieeelongdouble"
+// CHECK-ELFv2-IBM128: "-target-abi" "elfv2"

From d02f17daed3129373241e5be1989e25680c09412 Mon Sep 17 00:00:00 2001
From: Johan Vikstrom <jvikstrom@google.com>
Date: Mon, 15 Jul 2019 07:41:12 +0000
Subject: [PATCH 081/451] [clangd] Added highlighting to enum constants.

Summary: VSCode does not have a scope for enum constants. So they were placed under "constant.other.enum" as that seems to be the most correct scope for enum constants. However, this makes theia color them blue (the same color it uses for keywords).

Reviewers: hokein, sammccall, ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64624

llvm-svn: 366045
---
 .../clangd/SemanticHighlighting.cpp            |  6 ++++++
 .../clangd/SemanticHighlighting.h              |  1 +
 .../clangd/test/semantic-highlighting.test     |  3 +++
 .../unittests/SemanticHighlightingTests.cpp    | 18 +++++++++++++-----
 4 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index 40070a5bbadd2..37f3d90ea4aeb 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -119,6 +119,10 @@ class HighlightingTokenCollector
       addToken(Loc, HighlightingKind::Enum);
       return;
     }
+    if (isa<EnumConstantDecl>(D)) {
+      addToken(Loc, HighlightingKind::EnumConstant);
+      return;
+    }
     if (isa<VarDecl>(D)) {
       addToken(Loc, HighlightingKind::Variable);
       return;
@@ -249,6 +253,8 @@ llvm::StringRef toTextMateScope(HighlightingKind Kind) {
     return "entity.name.type.class.cpp";
   case HighlightingKind::Enum:
     return "entity.name.type.enum.cpp";
+  case HighlightingKind::EnumConstant:
+    return "variable.other.enummember.cpp";
   case HighlightingKind::Namespace:
     return "entity.name.namespace.cpp";
   case HighlightingKind::NumKinds:
diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h
index ba160e1c92999..e375f1be1c179 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.h
+++ b/clang-tools-extra/clangd/SemanticHighlighting.h
@@ -28,6 +28,7 @@ enum class HighlightingKind {
   Function,
   Class,
   Enum,
+  EnumConstant,
   Namespace,
 
   NumKinds,
diff --git a/clang-tools-extra/clangd/test/semantic-highlighting.test b/clang-tools-extra/clangd/test/semantic-highlighting.test
index 810dfe6207e58..99f48050bceb7 100644
--- a/clang-tools-extra/clangd/test/semantic-highlighting.test
+++ b/clang-tools-extra/clangd/test/semantic-highlighting.test
@@ -17,6 +17,9 @@
 # CHECK-NEXT:            "entity.name.type.enum.cpp"
 # CHECK-NEXT:          ],
 # CHECK-NEXT:          [
+# CHECK-NEXT:            "variable.other.enummember.cpp"
+# CHECK-NEXT:          ],
+# CHECK-NEXT:          [
 # CHECK-NEXT:            "entity.name.namespace.cpp"
 # CHECK-NEXT:          ]
 # CHECK-NEXT:        ]
diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index f45c11566836e..9ada83864108e 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -37,7 +37,8 @@ void checkHighlightings(llvm::StringRef Code) {
       {HighlightingKind::Function, "Function"},
       {HighlightingKind::Class, "Class"},
       {HighlightingKind::Enum, "Enum"},
-      {HighlightingKind::Namespace, "Namespace"}};
+      {HighlightingKind::Namespace, "Namespace"},
+      {HighlightingKind::EnumConstant, "EnumConstant"}};
   std::vector<HighlightingToken> ExpectedTokens;
   for (const auto &KindString : KindToString) {
     std::vector<HighlightingToken> Toks = makeHighlightingTokens(
@@ -103,12 +104,19 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
       }
     )cpp",
     R"cpp(
-      enum class $Enum[[E]] {};
-      enum $Enum[[EE]] {};
+      enum class $Enum[[E]] {
+        $EnumConstant[[A]],
+        $EnumConstant[[B]],
+      };
+      enum $Enum[[EE]] {
+        $EnumConstant[[Hi]],
+      };
       struct $Class[[A]] {
         $Enum[[E]] EEE;
         $Enum[[EE]] EEEE;
       };
+      int $Variable[[I]] = $EnumConstant[[Hi]];
+      $Enum[[E]] $Variable[[L]] = $Enum[[E]]::$EnumConstant[[B]];
     )cpp",
     R"cpp(
       namespace $Namespace[[abc]] {
@@ -118,7 +126,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
           namespace $Namespace[[cde]] {
             struct $Class[[A]] {
               enum class $Enum[[B]] {
-                Hi,
+                $EnumConstant[[Hi]],
               };
             };
           }
@@ -129,7 +137,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
             $Namespace[[abc]]::$Namespace[[bcd]]::$Namespace[[cde]];
       $Namespace[[abc]]::$Namespace[[bcd]]::$Class[[A]] $Variable[[AA]];
       $Namespace[[vwz]]::$Class[[A]]::$Enum[[B]] $Variable[[AAA]] =
-            $Namespace[[vwz]]::$Class[[A]]::$Enum[[B]]::Hi;
+            $Namespace[[vwz]]::$Class[[A]]::$Enum[[B]]::$EnumConstant[[Hi]];
       ::$Namespace[[vwz]]::$Class[[A]] $Variable[[B]];
       ::$Namespace[[abc]]::$Namespace[[bcd]]::$Class[[A]] $Variable[[BB]];
     )cpp"};

From ea36cdcec318578055514a75baf393c66b81d6c2 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Mon, 15 Jul 2019 08:09:21 +0000
Subject: [PATCH 082/451] DeveloperPolicy: fix a typo

llvm-svn: 366046
---
 llvm/docs/DeveloperPolicy.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst
index 003f75fdf5f9b..27abc66f6d00f 100644
--- a/llvm/docs/DeveloperPolicy.rst
+++ b/llvm/docs/DeveloperPolicy.rst
@@ -732,7 +732,7 @@ effort to change licenses, which aims to solve several problems:
 * Some contributions were not submitted to LLVM due to concerns that
   the patent grant required by the project was overly broad.
 * The patent grant was unique to the LLVM Project, not written by a lawyer, and
-  was difficult to determine what was protection was provided (if any).
+  was difficult to determine what protection was provided (if any).
 
 The scope of relicensing is all code that is considered part of the LLVM
 project, including the main LLVM repository, runtime libraries (compiler_rt,

From 17b4a932fae975f9e33863ef834613654eefe9e4 Mon Sep 17 00:00:00 2001
From: Johan Vikstrom <jvikstrom@google.com>
Date: Mon, 15 Jul 2019 08:12:21 +0000
Subject: [PATCH 083/451] [clangd] Added highlighting for members and methods.

Summary: Added highlighting for members and methods.

Reviewers: hokein, sammccall, ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64617

llvm-svn: 366047
---
 .../clangd/SemanticHighlighting.cpp           | 24 +++++++++-
 .../clangd/SemanticHighlighting.h             |  2 +
 .../clangd/test/semantic-highlighting.test    |  8 +++-
 .../unittests/SemanticHighlightingTests.cpp   | 44 +++++++++++++++----
 4 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index 37f3d90ea4aeb..1f8fa9541a6b3 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -40,6 +40,16 @@ class HighlightingTokenCollector
     return true;
   }
 
+  bool VisitMemberExpr(MemberExpr *ME) {
+    const auto *MD = ME->getMemberDecl();
+    if (isa<CXXDestructorDecl>(MD))
+      // When calling the destructor manually like: AAA::~A(); The ~ is a
+      // MemberExpr. Other methods should still be highlighted though.
+      return true;
+    addToken(ME->getMemberLoc(), MD);
+    return true;
+  }
+
   bool VisitNamedDecl(NamedDecl *ND) {
     // UsingDirectiveDecl's namespaces do not show up anywhere else in the
     // Visit/Traverse mehods. But they should also be highlighted as a
@@ -115,6 +125,14 @@ class HighlightingTokenCollector
       addToken(Loc, HighlightingKind::Class);
       return;
     }
+    if (isa<CXXMethodDecl>(D)) {
+      addToken(Loc, HighlightingKind::Method);
+      return;
+    }
+    if (isa<FieldDecl>(D)) {
+      addToken(Loc, HighlightingKind::Field);
+      return;
+    }
     if (isa<EnumDecl>(D)) {
       addToken(Loc, HighlightingKind::Enum);
       return;
@@ -247,8 +265,12 @@ llvm::StringRef toTextMateScope(HighlightingKind Kind) {
   switch (Kind) {
   case HighlightingKind::Function:
     return "entity.name.function.cpp";
+  case HighlightingKind::Method:
+    return "entity.name.function.method.cpp";
   case HighlightingKind::Variable:
-    return "variable.cpp";
+    return "variable.other.cpp";
+  case HighlightingKind::Field:
+    return "variable.other.field.cpp";
   case HighlightingKind::Class:
     return "entity.name.type.class.cpp";
   case HighlightingKind::Enum:
diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h
index e375f1be1c179..eaeeb861f9a29 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.h
+++ b/clang-tools-extra/clangd/SemanticHighlighting.h
@@ -26,6 +26,8 @@ namespace clangd {
 enum class HighlightingKind {
   Variable = 0,
   Function,
+  Method,
+  Field,
   Class,
   Enum,
   EnumConstant,
diff --git a/clang-tools-extra/clangd/test/semantic-highlighting.test b/clang-tools-extra/clangd/test/semantic-highlighting.test
index 99f48050bceb7..7de25d1713dc7 100644
--- a/clang-tools-extra/clangd/test/semantic-highlighting.test
+++ b/clang-tools-extra/clangd/test/semantic-highlighting.test
@@ -5,12 +5,18 @@
 # CHECK:      "semanticHighlighting": {
 # CHECK-NEXT:        "scopes": [
 # CHECK-NEXT:          [
-# CHECK-NEXT:            "variable.cpp"
+# CHECK-NEXT:            "variable.other.cpp"
 # CHECK-NEXT:          ],
 # CHECK-NEXT:          [
 # CHECK-NEXT:            "entity.name.function.cpp"
 # CHECK-NEXT:          ],
 # CHECK-NEXT:          [
+# CHECK-NEXT:            "entity.name.function.method.cpp"
+# CHECK-NEXT:          ],
+# CHECK-NEXT:          [
+# CHECK-NEXT:            "variable.other.field.cpp"
+# CHECK-NEXT:          ],
+# CHECK-NEXT:          [
 # CHECK-NEXT:            "entity.name.type.class.cpp"
 # CHECK-NEXT:          ],
 # CHECK-NEXT:          [
diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index 9ada83864108e..7ba35fee6d85f 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -38,7 +38,9 @@ void checkHighlightings(llvm::StringRef Code) {
       {HighlightingKind::Class, "Class"},
       {HighlightingKind::Enum, "Enum"},
       {HighlightingKind::Namespace, "Namespace"},
-      {HighlightingKind::EnumConstant, "EnumConstant"}};
+      {HighlightingKind::EnumConstant, "EnumConstant"},
+      {HighlightingKind::Field, "Field"},
+      {HighlightingKind::Method, "Method"}};
   std::vector<HighlightingToken> ExpectedTokens;
   for (const auto &KindString : KindToString) {
     std::vector<HighlightingToken> Toks = makeHighlightingTokens(
@@ -54,14 +56,14 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
   const char *TestCases[] = {
     R"cpp(
       struct $Class[[AS]] {
-        double SomeMember;
+        double $Field[[SomeMember]];
       };
       struct {
       } $Variable[[S]];
       void $Function[[foo]](int $Variable[[A]], $Class[[AS]] $Variable[[As]]) {
         auto $Variable[[VeryLongVariableName]] = 12312;
         $Class[[AS]]     $Variable[[AA]];
-        auto $Variable[[L]] = $Variable[[AA]].SomeMember + $Variable[[A]];
+        auto $Variable[[L]] = $Variable[[AA]].$Field[[SomeMember]] + $Variable[[A]];
         auto $Variable[[FN]] = [ $Variable[[AA]]](int $Variable[[A]]) -> void {};
         $Variable[[FN]](12312);
       }
@@ -73,19 +75,19 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
         auto $Variable[[Bou]] = $Function[[Gah]];
       }
       struct $Class[[A]] {
-        void $Function[[abc]]();
+        void $Method[[abc]]();
       };
     )cpp",
     R"cpp(
       namespace $Namespace[[abc]] {
         template<typename T>
         struct $Class[[A]] {
-          T t;
+          T $Field[[t]];
         };
       }
       template<typename T>
       struct $Class[[C]] : $Namespace[[abc]]::A<T> {
-        typename T::A* D;
+        typename T::A* $Field[[D]];
       };
       $Namespace[[abc]]::$Class[[A]]<int> $Variable[[AA]];
       typedef $Namespace[[abc]]::$Class[[A]]<int> AAA;
@@ -93,7 +95,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
         $Class[[B]]();
         ~$Class[[B]]();
         void operator<<($Class[[B]]);
-        $Class[[AAA]] AA;
+        $Class[[AAA]] $Field[[AA]];
       };
       $Class[[B]]::$Class[[B]]() {}
       $Class[[B]]::~$Class[[B]]() {}
@@ -112,8 +114,8 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
         $EnumConstant[[Hi]],
       };
       struct $Class[[A]] {
-        $Enum[[E]] EEE;
-        $Enum[[EE]] EEEE;
+        $Enum[[E]] $Field[[EEE]];
+        $Enum[[EE]] $Field[[EEEE]];
       };
       int $Variable[[I]] = $EnumConstant[[Hi]];
       $Enum[[E]] $Variable[[L]] = $Enum[[E]]::$EnumConstant[[B]];
@@ -140,6 +142,30 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
             $Namespace[[vwz]]::$Class[[A]]::$Enum[[B]]::$EnumConstant[[Hi]];
       ::$Namespace[[vwz]]::$Class[[A]] $Variable[[B]];
       ::$Namespace[[abc]]::$Namespace[[bcd]]::$Class[[A]] $Variable[[BB]];
+    )cpp",
+    R"cpp(
+      struct $Class[[D]] {
+        double $Field[[C]];
+      };
+      struct $Class[[A]] {
+        double $Field[[B]];
+        $Class[[D]] $Field[[E]];
+        static double $Variable[[S]];
+        void $Method[[foo]]() {
+          $Field[[B]] = 123;
+          this->$Field[[B]] = 156;
+          this->$Method[[foo]]();
+          $Method[[foo]]();
+          $Variable[[S]] = 90.1;
+        }
+      };
+      void $Function[[foo]]() {
+        $Class[[A]] $Variable[[AA]];
+        $Variable[[AA]].$Field[[B]] += 2;
+        $Variable[[AA]].$Method[[foo]]();
+        $Variable[[AA]].$Field[[E]].$Field[[C]];
+        $Class[[A]]::$Variable[[S]] = 90;
+      }
     )cpp"};
   for (const auto &TestCase : TestCases) {
     checkHighlightings(TestCase);

From 3ed93b4673b98dc58af0d25c4ab33fd2bcf8fca1 Mon Sep 17 00:00:00 2001
From: Serguei Katkov <serguei.katkov@azul.com>
Date: Mon, 15 Jul 2019 08:26:45 +0000
Subject: [PATCH 084/451] [Loop Peeling] Enable peeling for loops with multiple
 exits

This CL enables peeling of the loop with multiple exits where
one exit should be from latch and others are basic blocks with
call to deopt.

The peeling is enabled under the flag which is false by default.

Reviewers: reames, mkuper, iajbar, fhahn
Reviewed By: reames
Subscribers: xbolva00, hiraditya, zzheng, llvm-commits
Differential Revision: https://reviews.llvm.org/D63923

llvm-svn: 366048
---
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      |  3 +-
 llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp  | 20 +++++
 .../LoopUnroll/peel-loop-pgo-deopt.ll         | 80 +++++++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 3212dec9dde23..e39ade523714a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -402,7 +402,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
     // counts. If we later unroll the loop, we want these to be updated.
     if (Peeled) {
       // According to our guards and profitability checks the only
-      // meaningful exit should be latch block.
+      // meaningful exit should be latch block. Other exits go to deopt,
+      // so we do not worry about them.
       BasicBlock *ExitingBlock = L->getLoopLatch();
       assert(ExitingBlock && "Loop without exiting block?");
       assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?");
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 2cc5bae3b4c35..6394d74f31627 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -61,6 +61,10 @@ static cl::opt<unsigned> UnrollForcePeelCount(
     "unroll-force-peel-count", cl::init(0), cl::Hidden,
     cl::desc("Force a peel count regardless of profiling information."));
 
+static cl::opt<bool> UnrollPeelMultiDeoptExit(
+    "unroll-peel-multi-deopt-exit", cl::init(false), cl::Hidden,
+    cl::desc("Allow peeling of loops with multiple deopt exits."));
+
 // Designates that a Phi is estimated to become invariant after an "infinite"
 // number of loop iterations (i.e. only may become an invariant if the loop is
 // fully unrolled).
@@ -73,6 +77,22 @@ bool llvm::canPeel(Loop *L) {
   if (!L->isLoopSimplifyForm())
     return false;
 
+  if (UnrollPeelMultiDeoptExit) {
+    SmallVector<BasicBlock *, 4> Exits;
+    L->getUniqueNonLatchExitBlocks(Exits);
+
+    if (!Exits.empty()) {
+      // Latch's terminator is a conditional branch, Latch is exiting and
+      // all non Latch exits ends up with deoptimize.
+      const BasicBlock *Latch = L->getLoopLatch();
+      const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
+      return T && T->isConditional() && L->isLoopExiting(Latch) &&
+             all_of(Exits, [](const BasicBlock *BB) {
+               return BB->getTerminatingDeoptimizeCall();
+             });
+    }
+  }
+
   // Only peel loops that contain a single exit
   if (!L->getExitingBlock() || !L->getUniqueExitBlock())
     return false;
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
new file mode 100644
index 0000000000000..b669904297940
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s
+; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
+
+; Make sure we use the profile information correctly to peel-off 3 iterations
+; from the loop, and update the branch weights for the peeled loop properly.
+
+; CHECK: Loop Unroll: F[basic]
+; CHECK: PEELING loop %for.body with iteration count 3!
+
+; CHECK-LABEL: @basic
+; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16
+; CHECK: [[NEXT0]]:
+; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !17
+; CHECK: [[NEXT1]]:
+; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18
+; CHECK: [[NEXT2]]:
+; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !19
+
+define i32 @basic(i32* %p, i32 %k, i1 %c) #0 !prof !15 {
+entry:
+  %cmp3 = icmp slt i32 0, %k
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %continue ]
+  %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %continue ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %i.05, i32* %p.addr.04, align 4
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %c, label %continue, label %side_exit, !prof !17
+
+continue:
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !16
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res = phi i32 [ 0, %entry ], [ %inc, %for.cond.for.end_crit_edge ]
+  ret i32 %res
+
+side_exit:
+  %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ]
+  ret i32 %rval
+}
+
+declare i32 @llvm.experimental.deoptimize.i32(...)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind optsize }
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10}
+!5 = !{!"MaxCount", i64 3}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 3}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 3, i32 2}
+!13 = !{i32 999000, i64 1, i32 10}
+!14 = !{i32 999999, i64 1, i32 10}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 3001, i32 1001}
+!17 = !{!"branch_weights", i32 1, i32 0}
+
+;CHECK: !16 = !{!"branch_weights", i32 900, i32 101}
+;CHECK: !17 = !{!"branch_weights", i32 540, i32 360}
+;CHECK: !18 = !{!"branch_weights", i32 162, i32 378}
+;CHECK: !19 = !{!"branch_weights", i32 1399, i32 162}
+

From 1d554b7441258c8074c912c674f51b1b17625a38 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 15 Jul 2019 08:48:47 +0000
Subject: [PATCH 085/451] [LoopVectorize] Pass unfiltered list of arguments to
 getIntrinsicInstCost.

We do not compute the scalarization overhead in getVectorIntrinsicCost
and TTI::getIntrinsicInstrCost requires the full arguments list.

llvm-svn: 366049
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  7 ++---
 .../vector-intrinsic-call-cost.ll             | 30 +++++++++++++++++++
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 22cf9c7db9490..46265e3f3e131 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3149,11 +3149,8 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
   if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
     FMF = FPMO->getFastMathFlags();
 
-  // Skip operands that do not require extraction/scalarization and do not incur
-  // any overhead.
-  return TTI.getIntrinsicInstrCost(
-      ID, CI->getType(), filterExtractingOperands(CI->arg_operands(), VF), FMF,
-      VF);
+  SmallVector<Value *, 4> Operands(CI->arg_operands());
+  return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
 }
 
 static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
diff --git a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll
new file mode 100644
index 0000000000000..fce4d56c2e65d
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 %s | FileCheck %s
+
+; CHECK-LABEL: @test_fshl
+; CHECK-LABEL: vector.body:
+; CHECK-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK-NEXT:    %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
+; CHECK-NEXT:    %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    %induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    %0 = add i32 %index, 0
+; CHECK-NEXT:    %1 = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> <i16 15, i16 15, i16 15, i16 15>)
+; CHECK-NEXT:    %index.next = add i32 %index, 4
+; CHECK-NEXT:    %2 = icmp eq i32 %index.next, %n.vec
+; CHECK-NEXT:     br i1 %2, label %middle.block, label %vector.body, !llvm.loop !0
+;
+define void @test_fshl(i32 %width) {
+entry:
+  br label %for.body9.us.us
+
+for.cond6.for.cond.cleanup8_crit_edge.us.us:      ; preds = %for.body9.us.us
+  ret void
+
+for.body9.us.us:                                  ; preds = %for.body9.us.us, %entry
+  %x.020.us.us = phi i32 [ 0, %entry ], [ %inc.us.us, %for.body9.us.us ]
+  %conv4.i.us.us = tail call i16 @llvm.fshl.i16(i16 undef, i16 undef, i16 15)
+  %inc.us.us = add nuw i32 %x.020.us.us, 1
+  %exitcond50 = icmp eq i32 %inc.us.us, %width
+  br i1 %exitcond50, label %for.cond6.for.cond.cleanup8_crit_edge.us.us, label %for.body9.us.us
+}
+
+declare i16 @llvm.fshl.i16(i16, i16, i16)

From d021ad9fbeb6d29c8551879f703f45e263e7a700 Mon Sep 17 00:00:00 2001
From: Serguei Katkov <serguei.katkov@azul.com>
Date: Mon, 15 Jul 2019 09:13:11 +0000
Subject: [PATCH 086/451] [Loop Peeling] Fix the bug with IDom setting for exit
 loops

It is possible that loop exit has two predecessors in a loop body.
In this case after the peeling the iDom of the exit should be a clone of
iDom of original exit but no a clone of a block coming to this exit.

Reviewers: reames, fhahn
Reviewed By: reames
Subscribers: hiraditya, zzheng, llvm-commits
Differential Revision: https://reviews.llvm.org/D64618

llvm-svn: 366050
---
 llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp  | 21 ++++++-
 .../LoopUnroll/peel-loop-pgo-deopt-idom.ll    | 55 +++++++++++++++++++
 2 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll

diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 6394d74f31627..deb38df4420f6 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -583,6 +583,18 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
   L->getExitEdges(ExitEdges);
 
+  DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+  if (DT) {
+    assert(L->hasDedicatedExits() && "No dedicated exits?");
+    for (auto Edge : ExitEdges) {
+      if (ExitIDom.count(Edge.second))
+        continue;
+      BasicBlock *BB = DT->getNode(Edge.second)->getIDom()->getBlock();
+      assert(L->contains(BB) && "IDom is not in a loop");
+      ExitIDom[Edge.second] = BB;
+    }
+  }
+
   Function *F = Header->getParent();
 
   // Set up all the necessary basic blocks. It is convenient to split the
@@ -675,9 +687,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
       // latter is the first cloned loop body, as original PreHeader dominates
       // the original loop body.
       if (Iter == 0)
-        for (auto Edge : ExitEdges)
-          DT->changeImmediateDominator(Edge.second,
-                                       cast<BasicBlock>(LVMap[Edge.first]));
+        for (auto Exit : ExitIDom)
+          DT->changeImmediateDominator(Exit.first,
+                                       cast<BasicBlock>(LVMap[Exit.second]));
 #ifdef EXPENSIVE_CHECKS
       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
 #endif
@@ -719,6 +731,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   // We modified the loop, update SE.
   SE->forgetTopmostLoop(L);
 
+  // Finally DomtTree must be correct.
+  assert(DT->verify(DominatorTree::VerificationLevel::Fast));
+
   // FIXME: Incrementally update loop-simplify
   simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
 
diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll
new file mode 100644
index 0000000000000..ab3488c811077
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt-idom.ll
@@ -0,0 +1,55 @@
+; REQUIRES: asserts
+; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s
+; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
+
+; Regression test for setting the correct idom for exit blocks.
+
+; CHECK: Loop Unroll: F[basic]
+; CHECK: PEELING loop %for.body with iteration count 1!
+
+define i32 @basic(i32* %p, i32 %k, i1 %c1, i1 %c2) #0 !prof !3 {
+entry:
+  %cmp3 = icmp slt i32 0, %k
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %latch ]
+  %p.addr.04 = phi i32* [ %p, %for.body.lr.ph ], [ %incdec.ptr, %latch ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %i.05, i32* %p.addr.04, align 4
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %c1, label %continue, label %to_side_exit
+
+continue:
+  br i1 %c2, label %latch, label %side_exit, !prof !2
+
+latch:
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !prof !1
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+to_side_exit:
+  br i1 %c2, label %continue, label %side_exit, !prof !2
+
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res = phi i32 [ 0, %entry ], [ %inc, %for.cond.for.end_crit_edge ]
+  ret i32 %res
+
+side_exit:
+  %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %inc) ]
+  ret i32 %rval
+}
+
+declare i32 @llvm.experimental.deoptimize.i32(...)
+
+attributes #0 = { nounwind }
+
+!1 = !{!"branch_weights", i32 1, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 0}
+!3 = !{!"function_entry_count", i64 1}

From da750b1688fb82ca28d89d2dbe08784ed16f978c Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 15 Jul 2019 10:44:50 +0000
Subject: [PATCH 087/451] [ARM] Adjust how NEON shifts are lowered

This adjusts the way that we lower NEON shifts to use a DAG target node, not
via a neon intrinsic. This is useful for handling MVE shifts operations in the
same the way. It also renames some of the immediate shift nodes for
consistency, and moves some of the processing of immediate shifts into
LowerShift allowing it to capture more cases.

Differential Revision: https://reviews.llvm.org/D64426

llvm-svn: 366051
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 251 +++++++++++++-----------
 llvm/lib/Target/ARM/ARMISelLowering.h   |  38 ++--
 llvm/lib/Target/ARM/ARMInstrNEON.td     | 216 ++++++++++++--------
 llvm/test/CodeGen/ARM/vpadd.ll          |  47 +++--
 llvm/test/CodeGen/ARM/vuzp.ll           |  46 ++---
 5 files changed, 340 insertions(+), 258 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5e2de61e288f9..5773c3ba04ed3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1501,23 +1501,25 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
   case ARMISD::VTST:          return "ARMISD::VTST";
 
-  case ARMISD::VSHL:          return "ARMISD::VSHL";
-  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
-  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
-  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
-  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
-  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
-  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
-  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
-  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
-  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
-  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
-  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
-  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
-  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
-  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
-  case ARMISD::VSLI:          return "ARMISD::VSLI";
-  case ARMISD::VSRI:          return "ARMISD::VSRI";
+  case ARMISD::VSHLs:         return "ARMISD::VSHLs";
+  case ARMISD::VSHLu:         return "ARMISD::VSHLu";
+  case ARMISD::VSHLIMM:       return "ARMISD::VSHLIMM";
+  case ARMISD::VSHRsIMM:      return "ARMISD::VSHRsIMM";
+  case ARMISD::VSHRuIMM:      return "ARMISD::VSHRuIMM";
+  case ARMISD::VRSHRsIMM:     return "ARMISD::VRSHRsIMM";
+  case ARMISD::VRSHRuIMM:     return "ARMISD::VRSHRuIMM";
+  case ARMISD::VRSHRNIMM:     return "ARMISD::VRSHRNIMM";
+  case ARMISD::VQSHLsIMM:     return "ARMISD::VQSHLsIMM";
+  case ARMISD::VQSHLuIMM:     return "ARMISD::VQSHLuIMM";
+  case ARMISD::VQSHLsuIMM:    return "ARMISD::VQSHLsuIMM";
+  case ARMISD::VQSHRNsIMM:    return "ARMISD::VQSHRNsIMM";
+  case ARMISD::VQSHRNuIMM:    return "ARMISD::VQSHRNuIMM";
+  case ARMISD::VQSHRNsuIMM:   return "ARMISD::VQSHRNsuIMM";
+  case ARMISD::VQRSHRNsIMM:   return "ARMISD::VQRSHRNsIMM";
+  case ARMISD::VQRSHRNuIMM:   return "ARMISD::VQRSHRNuIMM";
+  case ARMISD::VQRSHRNsuIMM:  return "ARMISD::VQRSHRNsuIMM";
+  case ARMISD::VSLIIMM:       return "ARMISD::VSLIIMM";
+  case ARMISD::VSRIIMM:       return "ARMISD::VSRIIMM";
   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
@@ -5136,7 +5138,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
                                DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
     if (VT == MVT::f64)
-      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+      Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
                          DAG.getConstant(32, dl, MVT::i32));
     else /*if (VT == MVT::f32)*/
@@ -5144,11 +5146,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
     if (SrcVT == MVT::f32) {
       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
       if (VT == MVT::f64)
-        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+        Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
                            DAG.getConstant(32, dl, MVT::i32));
     } else if (VT == MVT::f32)
-      Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+      Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
                          DAG.getConstant(32, dl, MVT::i32));
     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
@@ -5653,40 +5655,99 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
   return Res;
 }
 
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (!BVN ||
+      !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+                            ElementBits) ||
+      SplatBitSize > ElementBits)
+    return false;
+  Cnt = SplatBits.getSExtValue();
+  return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation.  That value must be in the range:
+///   0 <= Value < ElementBits for a left shift; or
+///   0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  int64_t ElementBits = VT.getScalarSizeInBits();
+  if (!getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation.  For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+///   1 <= |Value| <= ElementBits for a right shift; or
+///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+                         int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  int64_t ElementBits = VT.getScalarSizeInBits();
+  if (!getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  if (!isIntrinsic)
+    return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
+  if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
+    Cnt = -Cnt;
+    return true;
+  }
+  return false;
+}
+
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                           const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  int64_t Cnt;
 
   if (!VT.isVector())
     return SDValue();
 
-  // Lower vector shifts on NEON to use VSHL.
-  assert(ST->hasNEON() && "unexpected vector shift");
+  // We essentially have two forms here. Shift by an immediate and shift by a
+  // vector register. We cannot easily match shift by an immediate in tablegen
+  // so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.  For shifting
+  // by a vector, we don't have VSHR, only VSHL (which can be signed or
+  // unsigned, and a negative shift indicates a shift right).
+  if (N->getOpcode() == ISD::SHL) {
+    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+      return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, dl, MVT::i32));
+    return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
+                       N->getOperand(1));
+  }
 
-  // Left shifts translate directly to the vshiftu intrinsic.
-  if (N->getOpcode() == ISD::SHL)
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
-                                       MVT::i32),
-                       N->getOperand(0), N->getOperand(1));
+  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
+         "unexpected vector shift opcode");
 
-  assert((N->getOpcode() == ISD::SRA ||
-          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+  if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+    unsigned VShiftOpc =
+        (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
+    return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
+                       DAG.getConstant(Cnt, dl, MVT::i32));
+  }
 
-  // NEON uses the same intrinsics for both left and right shifts.  For
-  // right shifts, the shift amounts are negative, so negate the vector of
-  // shift amounts.
+  // Other right shifts we don't have operations for (we use a shift left by a
+  // negative number).
   EVT ShiftVT = N->getOperand(1).getValueType();
-  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
-                                     getZeroVector(ShiftVT, DAG, dl),
-                                     N->getOperand(1));
-  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
-                             Intrinsic::arm_neon_vshifts :
-                             Intrinsic::arm_neon_vshiftu);
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(vshiftInt, dl, MVT::i32),
-                     N->getOperand(0), NegatedCount);
+  SDValue NegatedCount = DAG.getNode(
+      ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
+  unsigned VShiftOpc =
+      (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
+  return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
 }
 
 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
@@ -12574,58 +12635,6 @@ static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
                      ConvInput, DAG.getConstant(C, dl, MVT::i32));
 }
 
-/// Getvshiftimm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift operation, where all the elements of the
-/// build_vector must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
-  // Ignore bit_converts.
-  while (Op.getOpcode() == ISD::BITCAST)
-    Op = Op.getOperand(0);
-  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  APInt SplatBits, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
-                                      HasAnyUndefs, ElementBits) ||
-      SplatBitSize > ElementBits)
-    return false;
-  Cnt = SplatBits.getSExtValue();
-  return true;
-}
-
-/// isVShiftLImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift left operation.  That value must be in the range:
-///   0 <= Value < ElementBits for a left shift; or
-///   0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
-  assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getScalarSizeInBits();
-  if (! getVShiftImm(Op, ElementBits, Cnt))
-    return false;
-  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
-}
-
-/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation.  For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-///   1 <= |Value| <= ElementBits for a right shift; or
-///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
-                         int64_t &Cnt) {
-  assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getScalarSizeInBits();
-  if (! getVShiftImm(Op, ElementBits, Cnt))
-    return false;
-  if (!isIntrinsic)
-    return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
-  if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
-    Cnt = -Cnt;
-    return true;
-  }
-  return false;
-}
-
 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
@@ -12661,12 +12670,12 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     case Intrinsic::arm_neon_vshifts:
     case Intrinsic::arm_neon_vshiftu:
       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
-        VShiftOpc = ARMISD::VSHL;
+        VShiftOpc = ARMISD::VSHLIMM;
         break;
       }
       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
-        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
-                     ARMISD::VSHRs : ARMISD::VSHRu);
+        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
+                                                          : ARMISD::VSHRuIMM);
         break;
       }
       return SDValue();
@@ -12711,29 +12720,41 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
       // Opcode already set above.
       break;
     case Intrinsic::arm_neon_vrshifts:
-      VShiftOpc = ARMISD::VRSHRs; break;
+      VShiftOpc = ARMISD::VRSHRsIMM;
+      break;
     case Intrinsic::arm_neon_vrshiftu:
-      VShiftOpc = ARMISD::VRSHRu; break;
+      VShiftOpc = ARMISD::VRSHRuIMM;
+      break;
     case Intrinsic::arm_neon_vrshiftn:
-      VShiftOpc = ARMISD::VRSHRN; break;
+      VShiftOpc = ARMISD::VRSHRNIMM;
+      break;
     case Intrinsic::arm_neon_vqshifts:
-      VShiftOpc = ARMISD::VQSHLs; break;
+      VShiftOpc = ARMISD::VQSHLsIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftu:
-      VShiftOpc = ARMISD::VQSHLu; break;
+      VShiftOpc = ARMISD::VQSHLuIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftsu:
-      VShiftOpc = ARMISD::VQSHLsu; break;
+      VShiftOpc = ARMISD::VQSHLsuIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftns:
-      VShiftOpc = ARMISD::VQSHRNs; break;
+      VShiftOpc = ARMISD::VQSHRNsIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftnu:
-      VShiftOpc = ARMISD::VQSHRNu; break;
+      VShiftOpc = ARMISD::VQSHRNuIMM;
+      break;
     case Intrinsic::arm_neon_vqshiftnsu:
-      VShiftOpc = ARMISD::VQSHRNsu; break;
+      VShiftOpc = ARMISD::VQSHRNsuIMM;
+      break;
     case Intrinsic::arm_neon_vqrshiftns:
-      VShiftOpc = ARMISD::VQRSHRNs; break;
+      VShiftOpc = ARMISD::VQRSHRNsIMM;
+      break;
     case Intrinsic::arm_neon_vqrshiftnu:
-      VShiftOpc = ARMISD::VQRSHRNu; break;
+      VShiftOpc = ARMISD::VQRSHRNuIMM;
+      break;
     case Intrinsic::arm_neon_vqrshiftnsu:
-      VShiftOpc = ARMISD::VQRSHRNsu; break;
+      VShiftOpc = ARMISD::VQRSHRNsuIMM;
+      break;
     }
 
     SDLoc dl(N);
@@ -12747,9 +12768,9 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
     unsigned VShiftOpc = 0;
 
     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
-      VShiftOpc = ARMISD::VSLI;
+      VShiftOpc = ARMISD::VSLIIMM;
     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
-      VShiftOpc = ARMISD::VSRI;
+      VShiftOpc = ARMISD::VSRIIMM;
     else {
       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
     }
@@ -12840,7 +12861,7 @@ static SDValue PerformShiftCombine(SDNode *N,
   case ISD::SHL:
     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
       SDLoc dl(N);
-      return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
+      return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
                          DAG.getConstant(Cnt, dl, MVT::i32));
     }
     break;
@@ -12848,8 +12869,8 @@ static SDValue PerformShiftCombine(SDNode *N,
   case ISD::SRA:
   case ISD::SRL:
     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
-      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
-                            ARMISD::VSHRs : ARMISD::VSHRu);
+      unsigned VShiftOpc =
+          (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
       SDLoc dl(N);
       return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
                          DAG.getConstant(Cnt, dl, MVT::i32));
@@ -13619,7 +13640,7 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
 
   SDNode *U = *ExtVal->use_begin();
   if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
-       U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+       U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
     return false;
 
   return true;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 4f9f33e98546a..1675ec59a3541 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -139,32 +139,36 @@ class VectorType;
       VCGTU,        // Vector compare unsigned greater than.
       VTST,         // Vector test bits.
 
+      // Vector shift by vector
+      VSHLs,        // ...left/right by signed
+      VSHLu,        // ...left/right by unsigned
+
       // Vector shift by immediate:
-      VSHL,         // ...left
-      VSHRs,        // ...right (signed)
-      VSHRu,        // ...right (unsigned)
+      VSHLIMM,      // ...left
+      VSHRsIMM,     // ...right (signed)
+      VSHRuIMM,     // ...right (unsigned)
 
       // Vector rounding shift by immediate:
-      VRSHRs,       // ...right (signed)
-      VRSHRu,       // ...right (unsigned)
-      VRSHRN,       // ...right narrow
+      VRSHRsIMM,    // ...right (signed)
+      VRSHRuIMM,    // ...right (unsigned)
+      VRSHRNIMM,    // ...right narrow
 
       // Vector saturating shift by immediate:
-      VQSHLs,       // ...left (signed)
-      VQSHLu,       // ...left (unsigned)
-      VQSHLsu,      // ...left (signed to unsigned)
-      VQSHRNs,      // ...right narrow (signed)
-      VQSHRNu,      // ...right narrow (unsigned)
-      VQSHRNsu,     // ...right narrow (signed to unsigned)
+      VQSHLsIMM,    // ...left (signed)
+      VQSHLuIMM,    // ...left (unsigned)
+      VQSHLsuIMM,   // ...left (signed to unsigned)
+      VQSHRNsIMM,   // ...right narrow (signed)
+      VQSHRNuIMM,   // ...right narrow (unsigned)
+      VQSHRNsuIMM,  // ...right narrow (signed to unsigned)
 
       // Vector saturating rounding shift by immediate:
-      VQRSHRNs,     // ...right narrow (signed)
-      VQRSHRNu,     // ...right narrow (unsigned)
-      VQRSHRNsu,    // ...right narrow (signed to unsigned)
+      VQRSHRNsIMM,  // ...right narrow (signed)
+      VQRSHRNuIMM,  // ...right narrow (unsigned)
+      VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
 
       // Vector shift and insert:
-      VSLI,         // ...left
-      VSRI,         // ...right
+      VSLIIMM,      // ...left
+      VSRIIMM,      // ...right
 
       // Vector get lane (VMOV scalar to ARM core register)
       // (These are used for 8- and 16-bit element types only.)
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index cd7b75b268f1d..64d949f79e010 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -493,38 +493,45 @@ def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
 def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
 def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
 
+// Vector Shifts
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                     SDTCisSameAs<0, 2>,]>;
+
+def NEONvshls        : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
+def NEONvshlu        : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+
 // Types for vector shift by immediates.  The "SHX" version is for long and
 // narrow operations where the source and destination vectors have different
 // types.  The "SHINS" version is for shift and insert operations.
-def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
-                                         SDTCisVT<2, i32>]>;
-def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
-                                         SDTCisVT<2, i32>]>;
-def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
-                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
-
-def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
-def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
-def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
-def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
-
-def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
-def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
-def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
-
-def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
-def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
-def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
-def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
-def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
-def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
-
-def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
-def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
-def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
-
-def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
-def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+def SDTARMVSHIMM     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                            SDTCisVT<2, i32>]>;
+def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+                                            SDTCisVT<2, i32>]>;
+def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                            SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshlImm      : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
+def NEONvshrsImm     : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
+def NEONvshruImm     : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
+
+def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
+def NEONvrshruImm    : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
+def NEONvrshrnImm    : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
+
+def NEONvqshlsImm    : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
+def NEONvqshluImm    : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
+def NEONvqshlsuImm   : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
+def NEONvqshrnsImm   : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqshrnuImm   : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqshrnsuImm  : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
+
+def NEONvqrshrnsImm  : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnuImm  : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
+
+def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
+def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
 
 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
                                            SDTCisVT<2, i32>]>;
@@ -4097,72 +4104,72 @@ multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                           string OpcodeStr> {
   // 64-bit vector types.
   def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
-                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
                              // imm6 = xxxxxx
 }
 multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                           string OpcodeStr> {
   // 64-bit vector types.
   def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
-                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
-                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
-                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
-                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
                              // imm6 = xxxxxx
 
   // 128-bit vector types.
   def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
-                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
-                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
-                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
   def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
-                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
                              // imm6 = xxxxxx
 }
 
@@ -4262,11 +4269,11 @@ defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
                             int_arm_neon_vraddhn, 1>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8  (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8  (trunc (NEONvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
           (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (NEONvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
           (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (NEONvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
           (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
 }
 
@@ -5020,11 +5027,11 @@ defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
                             int_arm_neon_vrsubhn, 0>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8  (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8  (trunc (NEONvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
           (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (NEONvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
           (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (NEONvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
           (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
 }
 
@@ -5515,7 +5522,7 @@ def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
 
 def abd_shr :
     PatFrag<(ops node:$in1, node:$in2, node:$shift),
-            (NEONvshrs (sub (zext node:$in1),
+            (NEONvshrsImm (sub (zext node:$in1),
                             (zext node:$in2)), (i32 $shift))>;
 
 let Predicates = [HasNEON] in {
@@ -5782,20 +5789,57 @@ defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
                             IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
                             "vshl", "u", int_arm_neon_vshiftu>;
 
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (NEONvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+          (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (NEONvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+          (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (NEONvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+          (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (NEONvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+          (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (NEONvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+          (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (NEONvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+          (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (NEONvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+          (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (NEONvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+          (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
+
+def : Pat<(v8i8 (NEONvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+          (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (NEONvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+          (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (NEONvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+          (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (NEONvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+          (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (NEONvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+          (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (NEONvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+          (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (NEONvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+          (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (NEONvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+          (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
+
+}
+
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshlImm>;
 
 //   VSHR     : Vector Shift Right (Immediate)
 defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
-                            NEONvshrs>;
+                            NEONvshrsImm>;
 defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
-                            NEONvshru>;
+                            NEONvshruImm>;
 
 //   VSHLL    : Vector Shift Left Long
 defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
-  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
+  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (sext node:$LHS), node:$RHS)>>;
 defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
-  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
+  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (zext node:$LHS), node:$RHS)>>;
 
 //   VSHLL    : Vector Shift Left Long (with maximum shift count)
 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
@@ -5814,37 +5858,37 @@ def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
                           v2i64, v2i32, imm32>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (NEONvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (NEONvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (NEONvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (NEONvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (NEONvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (NEONvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (NEONvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (NEONvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (NEONvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
 }
 
 //   VSHRN    : Vector Shift Right and Narrow
 defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
                            PatFrag<(ops node:$Rn, node:$amt),
-                                   (trunc (NEONvshrs node:$Rn, node:$amt))>>;
+                                   (trunc (NEONvshrsImm node:$Rn, node:$amt))>>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
+def : Pat<(v8i8 (trunc (NEONvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
           (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
-def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
+def : Pat<(v4i16 (trunc (NEONvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
           (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
-def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
+def : Pat<(v2i32 (trunc (NEONvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
           (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
 }
 
@@ -5857,13 +5901,13 @@ defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
                             "vrshl", "u", int_arm_neon_vrshiftu>;
 //   VRSHR    : Vector Rounding Shift Right
 defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
-                            NEONvrshrs>;
+                            NEONvrshrsImm>;
 defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
-                            NEONvrshru>;
+                            NEONvrshruImm>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
 defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
-                           NEONvrshrn>;
+                           NEONvrshrnImm>;
 
 //   VQSHL    : Vector Saturating Shift
 defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
@@ -5873,21 +5917,21 @@ defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
                             "vqshl", "u", int_arm_neon_vqshiftu>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
-defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
+defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
 
 //   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
 
 //   VQSHRN   : Vector Saturating Shift Right and Narrow
 defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
-                           NEONvqshrns>;
+                           NEONvqshrnsImm>;
 defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
-                           NEONvqshrnu>;
+                           NEONvqshrnuImm>;
 
 //   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
 defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
-                           NEONvqshrnsu>;
+                           NEONvqshrnsuImm>;
 
 //   VQRSHL   : Vector Saturating Rounding Shift
 defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
@@ -5899,20 +5943,20 @@ defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
 
 //   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
 defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
-                           NEONvqrshrns>;
+                           NEONvqrshrnsImm>;
 defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
-                           NEONvqrshrnu>;
+                           NEONvqrshrnuImm>;
 
 //   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
 defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
-                           NEONvqrshrnsu>;
+                           NEONvqrshrnsuImm>;
 
 //   VSRA     : Vector Shift Right and Accumulate
-defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
-defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrsImm>;
+defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshruImm>;
 //   VRSRA    : Vector Rounding Shift Right and Accumulate
-defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
-defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
+defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
 
 //   VSLI     : Vector Shift Left and Insert
 defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll
index 731bc373aaa61..72c3da298cf3e 100644
--- a/llvm/test/CodeGen/ARM/vpadd.ll
+++ b/llvm/test/CodeGen/ARM/vpadd.ll
@@ -285,17 +285,14 @@ define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss
 define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
 ; CHECK-LABEL: addCombineToVPADDL_s8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov.i16	d16, #0x8
-; CHECK-NEXT:    vld1.64	{d18, d19}, [r0]
-; CHECK-NEXT:    vext.8	d17, d18, d16, #1
-; CHECK-NEXT:    vneg.s16	d16, d16
-; CHECK-NEXT:    vshl.i16	d18, d18, #8
-; CHECK-NEXT:    vshl.i16	d17, d17, #8
-; CHECK-NEXT:    vshl.s16	d18, d18, d16
-; CHECK-NEXT:    vshl.s16	d16, d17, d16
-; CHECK-NEXT:    vadd.i16	d16, d16, d18
-; CHECK-NEXT:    vstr	d16, [r1]
-; CHECK-NEXT:    mov	pc, lr
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT:    vext.8 d18, d16, d16, #1
+; CHECK-NEXT:    vshl.i16 d16, d16, #8
+; CHECK-NEXT:    vshl.i16 d18, d18, #8
+; CHECK-NEXT:    vshr.s16 d17, d18, #8
+; CHECK-NEXT:    vsra.s16 d17, d16, #8
+; CHECK-NEXT:    vstr d17, [r1]
+; CHECK-NEXT:    mov pc, lr
   %tmp = load <16 x i8>, <16 x i8>* %cbcr
   %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -488,7 +485,19 @@ define <2 x i16> @fromExtendingExtractVectorElt_i16(<4 x i16> %in) {
 ; And <2 x i8> to <2 x i32>
 define <2 x i8> @fromExtendingExtractVectorElt_2i8(<8 x i8> %in) {
 ; CHECK-LABEL: fromExtendingExtractVectorElt_2i8:
-; CHECK:    vadd.i32
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vmov.u8 r1, d16[1]
+; CHECK-NEXT:    vmov.u8 r0, d16[0]
+; CHECK-NEXT:    vmov.u8 r2, d16[2]
+; CHECK-NEXT:    vmov.u8 r3, d16[3]
+; CHECK-NEXT:    vmov.32 d17[0], r1
+; CHECK-NEXT:    vmov.32 d16[0], r0
+; CHECK-NEXT:    vmov.32 d17[1], r3
+; CHECK-NEXT:    vmov.32 d16[1], r2
+; CHECK-NEXT:    vadd.i32 d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
   %tmp1 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> <i32 0, i32 2>
   %tmp2 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> <i32 1, i32 3>
   %x = add <2 x i8> %tmp2, %tmp1
@@ -497,7 +506,19 @@ define <2 x i8> @fromExtendingExtractVectorElt_2i8(<8 x i8> %in) {
 
 define <2 x i16> @fromExtendingExtractVectorElt_2i16(<8 x i16> %in) {
 ; CHECK-LABEL: fromExtendingExtractVectorElt_2i16:
-; CHECK:    vadd.i32
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vmov.u16 r0, d16[0]
+; CHECK-NEXT:    vmov.u16 r1, d16[1]
+; CHECK-NEXT:    vmov.u16 r3, d16[3]
+; CHECK-NEXT:    vmov.u16 r2, d16[2]
+; CHECK-NEXT:    vmov.32 d16[0], r0
+; CHECK-NEXT:    vmov.32 d17[0], r1
+; CHECK-NEXT:    vmov.32 d16[1], r2
+; CHECK-NEXT:    vmov.32 d17[1], r3
+; CHECK-NEXT:    vadd.i32 d16, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
  %tmp1 = shufflevector <8 x i16> %in, <8 x i16> undef, <2 x i32> <i32 0, i32 2>
  %tmp2 = shufflevector <8 x i16> %in, <8 x i16> undef, <2 x i32> <i32 1, i32 3>
  %x = add <2 x i16> %tmp2, %tmp1
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 84bb78243cce6..6a48f0cf44983 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -360,16 +360,14 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [lr]
 ; CHECK-NEXT:    vcgt.u32 q8, q9, q8
 ; CHECK-NEXT:    vld1.32 {d18[0]}, [r12:32]
-; CHECK-NEXT:    vmov.i8 d19, #0x7
-; CHECK-NEXT:    vmovl.u8 q10, d18
+; CHECK-NEXT:    vmovl.u8 q9, d18
 ; CHECK-NEXT:    vmovn.i32 d16, q8
-; CHECK-NEXT:    vneg.s8 d17, d19
-; CHECK-NEXT:    vmov d18, r2, r3
-; CHECK-NEXT:    vuzp.8 d16, d20
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vuzp.8 d16, d18
+; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    vshl.i8 d16, d16, #7
-; CHECK-NEXT:    vshl.s8 d16, d16, d17
-; CHECK-NEXT:    vmov d17, r0, r1
-; CHECK-NEXT:    vbsl d16, d17, d18
+; CHECK-NEXT:    vshr.s8 d16, d16, #7
+; CHECK-NEXT:    vbsl d16, d18, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    pop {r11, lr}
 ; CHECK-NEXT:    mov pc, lr
@@ -392,15 +390,13 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1
 ; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r12]
 ; CHECK-NEXT:    vcgt.u32 q8, q9, q8
-; CHECK-NEXT:    vmov.i8 d18, #0x7
+; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    vmovn.i32 d16, q8
 ; CHECK-NEXT:    vuzp.8 d16, d17
-; CHECK-NEXT:    vneg.s8 d17, d18
+; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vshl.i8 d16, d16, #7
-; CHECK-NEXT:    vmov d18, r2, r3
-; CHECK-NEXT:    vshl.s8 d16, d16, d17
-; CHECK-NEXT:    vmov d17, r0, r1
-; CHECK-NEXT:    vbsl d16, d17, d18
+; CHECK-NEXT:    vshr.s8 d16, d16, #7
+; CHECK-NEXT:    vbsl d16, d18, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
                          <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
@@ -421,15 +417,13 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r12]
 ; CHECK-NEXT:    vcgt.u32 q8, q9, q8
 ; CHECK-NEXT:    vldr d18, .LCPI22_0
-; CHECK-NEXT:    vmov.i8 d19, #0x7
 ; CHECK-NEXT:    vmovn.i32 d16, q8
 ; CHECK-NEXT:    vtbl.8 d16, {d16}, d18
-; CHECK-NEXT:    vneg.s8 d17, d19
-; CHECK-NEXT:    vmov d18, r2, r3
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    vshl.i8 d16, d16, #7
-; CHECK-NEXT:    vshl.s8 d16, d16, d17
-; CHECK-NEXT:    vmov d17, r0, r1
-; CHECK-NEXT:    vbsl d16, d17, d18
+; CHECK-NEXT:    vshr.s8 d16, d16, #7
+; CHECK-NEXT:    vbsl d16, d18, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
 ; CHECK-NEXT:    .p2align 3
@@ -487,20 +481,18 @@ define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
 ; CHECK-NEXT:    vcgt.u32 q8, q9, q8
 ; CHECK-NEXT:    vmovn.i32 d19, q10
 ; CHECK-NEXT:    vmov.u8 lr, d23[3]
-; CHECK-NEXT:    vldr d20, .LCPI23_0
 ; CHECK-NEXT:    vmovn.i32 d18, q8
 ; CHECK-NEXT:    vmovn.i16 d22, q9
-; CHECK-NEXT:    vmov.i8 q9, #0x7
-; CHECK-NEXT:    vneg.s8 q9, q9
+; CHECK-NEXT:    vldr d18, .LCPI23_0
 ; CHECK-NEXT:    vmov.8 d17[0], lr
-; CHECK-NEXT:    vtbl.8 d16, {d22, d23}, d20
+; CHECK-NEXT:    vtbl.8 d16, {d22, d23}, d18
+; CHECK-NEXT:    vmov d19, r2, r3
 ; CHECK-NEXT:    vld1.8 {d17[1]}, [r4]
 ; CHECK-NEXT:    add r4, sp, #8
+; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    vshl.i8 q8, q8, #7
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r4]
-; CHECK-NEXT:    vshl.s8 q8, q8, q9
-; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vshr.s8 q8, q8, #7
 ; CHECK-NEXT:    vbsl q8, q9, q10
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17

From 309246e4e2fe835051e4cf9cd23533918ad2335a Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 15 Jul 2019 10:50:03 +0000
Subject: [PATCH 088/451] [obj2yaml] - Rework tool's error reporting logic for
 ELF target.

ELF.h contains two getSymbol methods
which seems to be used only from obj2yaml.

One of these methods calls another, which in turn
contains untested error message which doesn't
provide enough information.

Problem is that after improving only just that message,
obj2yaml will not show it,
("Error reading file: yaml: Invalid data was
encountered while parsing the file" message will be shown instead),
because internal errors handling of tool is based on ErrorOr<> class which
stores a error code and as a result can only show a predefined error string, what
actually isn't very useful.

In this patch, I rework obj2yaml's error reporting system
for ELF targets to use Error  Expected<> classes.
Also, I improve the error message produced
by getSymbol for demonstration of the new functionality.

Differential revision: https://reviews.llvm.org/D64631

llvm-svn: 366052
---
 llvm/include/llvm/Object/ELF.h                |  23 +-
 llvm/test/tools/obj2yaml/section-group.test   |  24 +-
 .../obj2yaml/special-symbol-indices.yaml      |   2 +-
 llvm/tools/obj2yaml/elf2yaml.cpp              | 319 +++++++++---------
 llvm/tools/obj2yaml/obj2yaml.cpp              |  13 +-
 llvm/tools/obj2yaml/obj2yaml.h                |   2 +-
 6 files changed, 199 insertions(+), 184 deletions(-)

diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 8fe9f2919c5b4..7bc6dc4620c7d 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -354,22 +354,19 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
   return getSection(Index);
 }
 
-template <class ELFT>
-inline Expected<const typename ELFT::Sym *>
-getSymbol(typename ELFT::SymRange Symbols, uint32_t Index) {
-  if (Index >= Symbols.size())
-    // TODO: this error is untested.
-    return createError("invalid symbol index");
-  return &Symbols[Index];
-}
-
 template <class ELFT>
 Expected<const typename ELFT::Sym *>
 ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
-  auto SymtabOrErr = symbols(Sec);
-  if (!SymtabOrErr)
-    return SymtabOrErr.takeError();
-  return object::getSymbol<ELFT>(*SymtabOrErr, Index);
+  auto SymsOrErr = symbols(Sec);
+  if (!SymsOrErr)
+    return SymsOrErr.takeError();
+
+  Elf_Sym_Range Symbols = *SymsOrErr;
+  if (Index >= Symbols.size())
+    return createError("unable to get symbol from section " +
+                       getSecIndexForError(this, Sec) +
+                       ": invalid symbol index (" + Twine(Index) + ")");
+  return &Symbols[Index];
 }
 
 template <class ELFT>
diff --git a/llvm/test/tools/obj2yaml/section-group.test b/llvm/test/tools/obj2yaml/section-group.test
index 78af00cd138b3..cd520cb1b361f 100644
--- a/llvm/test/tools/obj2yaml/section-group.test
+++ b/llvm/test/tools/obj2yaml/section-group.test
@@ -1,6 +1,6 @@
 ## Checks that the tool is able to read section groups from ELF.
 
-# RUN: yaml2obj %s > %t1.o
+# RUN: yaml2obj --docnum=1 %s > %t1.o
 # RUN: llvm-readobj --elf-section-groups %t1.o | FileCheck %s -check-prefix=OBJ
 # RUN: obj2yaml %t1.o | FileCheck %s --check-prefix YAML
 
@@ -46,3 +46,25 @@ Symbols:
   - Name:    signature
     Type:    STT_OBJECT
     Section: .rodata
+
+## Check obj2yaml report an error when sh_info field of
+## group section contains invalid (too large) signature symbol index.
+
+# RUN: yaml2obj --docnum=2 %s > %t2.o
+# RUN: not obj2yaml %t2.o 2>&1 | FileCheck %s --check-prefix ERR
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .group
+    Type: SHT_GROUP
+    Link: .symtab
+    Info: 0xFF
+    Members:
+      - SectionOrType: GRP_COMDAT
+
+# ERR: Error reading file: {{.*}}2.o: unable to get symbol from section [index 2]: invalid symbol index (255)
diff --git a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml b/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
index 25550c944f385..fcc2a705f9c75 100644
--- a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
+++ b/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
@@ -51,4 +51,4 @@ Symbols:
 ## shn_xindex.o contains a symbol with st_shndx == SHN_XINDEX.
 ## We do not support it at this moment.
 # RUN: not obj2yaml %S/Inputs/shn_xindex.o 2>&1 | FileCheck %s --check-prefix=ERR
-# ERR: Error reading file: {{.*}}shn_xindex.o: Feature not yet implemented.
+# ERR: Error reading file: {{.*}}shn_xindex.o: SHN_XINDEX symbols are not supported
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index 7404bae2a08ac..bd27c103403eb 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -44,31 +44,31 @@ class ELFDumper {
   const object::ELFFile<ELFT> &Obj;
   ArrayRef<Elf_Word> ShndxTable;
 
-  std::error_code dumpSymbols(const Elf_Shdr *Symtab,
-                              std::vector<ELFYAML::Symbol> &Symbols);
-  std::error_code dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-                             StringRef StrTable, ELFYAML::Symbol &S);
-  std::error_code dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
-  std::error_code dumpCommonRelocationSection(const Elf_Shdr *Shdr,
-                                              ELFYAML::RelocationSection &S);
+  Error dumpSymbols(const Elf_Shdr *Symtab,
+                    std::vector<ELFYAML::Symbol> &Symbols);
+  Error dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                   StringRef StrTable, ELFYAML::Symbol &S);
+  Error dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
+  Error dumpCommonRelocationSection(const Elf_Shdr *Shdr,
+                                    ELFYAML::RelocationSection &S);
   template <class RelT>
-  std::error_code dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
-                                 ELFYAML::Relocation &R);
-  
-  ErrorOr<ELFYAML::DynamicSection *> dumpDynamicSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::RelocationSection *> dumpRelocSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::RawContentSection *>
+  Error dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
+                       ELFYAML::Relocation &R);
+
+  Expected<ELFYAML::DynamicSection *> dumpDynamicSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::RelocationSection *> dumpRelocSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::RawContentSection *>
   dumpContentSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::NoBitsSection *> dumpNoBitsSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::VerdefSection *> dumpVerdefSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::SymverSection *> dumpSymverSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::VerneedSection *> dumpVerneedSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::Group *> dumpGroup(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::MipsABIFlags *> dumpMipsABIFlags(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::NoBitsSection *> dumpNoBitsSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::VerdefSection *> dumpVerdefSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::SymverSection *> dumpSymverSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::VerneedSection *> dumpVerneedSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::Group *> dumpGroup(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::MipsABIFlags *> dumpMipsABIFlags(const Elf_Shdr *Shdr);
 
 public:
   ELFDumper(const object::ELFFile<ELFT> &O);
-  ErrorOr<ELFYAML::Object *> dump();
+  Expected<ELFYAML::Object *> dump();
 };
 
 }
@@ -134,7 +134,7 @@ ELFDumper<ELFT>::getUniquedSymbolName(const Elf_Sym *Sym, StringRef StrTable,
   return Name;
 }
 
-template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
+template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   auto Y = make_unique<ELFYAML::Object>();
 
   // Dump header. We do not dump SHEntSize, SHOffset, SHNum and SHStrNdx field.
@@ -152,7 +152,7 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   // Dump sections
   auto SectionsOrErr = Obj.sections();
   if (!SectionsOrErr)
-    return errorToErrorCode(SectionsOrErr.takeError());
+    return SectionsOrErr.takeError();
   Sections = *SectionsOrErr;
   SectionNames.resize(Sections.size());
 
@@ -160,20 +160,20 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   // to access the deduplicated symbol names that we also create here.
   for (const Elf_Shdr &Sec : Sections) {
     if (Sec.sh_type == ELF::SHT_SYMTAB)
-      if (auto EC = dumpSymbols(&Sec, Y->Symbols))
-        return EC;
+      if (Error E = dumpSymbols(&Sec, Y->Symbols))
+        return std::move(E);
     if (Sec.sh_type == ELF::SHT_DYNSYM)
-      if (auto EC = dumpSymbols(&Sec, Y->DynamicSymbols))
-        return EC;
+      if (Error E = dumpSymbols(&Sec, Y->DynamicSymbols))
+        return std::move(E);
   }
 
   for (const Elf_Shdr &Sec : Sections) {
     switch (Sec.sh_type) {
     case ELF::SHT_DYNAMIC: {
-      ErrorOr<ELFYAML::DynamicSection *> S = dumpDynamicSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::DynamicSection *> SecOrErr = dumpDynamicSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_NULL:
@@ -185,65 +185,66 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
     case ELF::SHT_SYMTAB_SHNDX: {
       auto TableOrErr = Obj.getSHNDXTable(Sec);
       if (!TableOrErr)
-        return errorToErrorCode(TableOrErr.takeError());
+        return TableOrErr.takeError();
       ShndxTable = *TableOrErr;
       break;
     }
     case ELF::SHT_REL:
     case ELF::SHT_RELA: {
-      ErrorOr<ELFYAML::RelocationSection *> S = dumpRelocSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::RelocationSection *> SecOrErr = dumpRelocSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GROUP: {
-      ErrorOr<ELFYAML::Group *> G = dumpGroup(&Sec);
-      if (std::error_code EC = G.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(G.get()));
+      Expected<ELFYAML::Group *> GroupOrErr = dumpGroup(&Sec);
+      if (!GroupOrErr)
+        return GroupOrErr.takeError();
+      Y->Sections.emplace_back(*GroupOrErr);
       break;
     }
     case ELF::SHT_MIPS_ABIFLAGS: {
-      ErrorOr<ELFYAML::MipsABIFlags *> G = dumpMipsABIFlags(&Sec);
-      if (std::error_code EC = G.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(G.get()));
+      Expected<ELFYAML::MipsABIFlags *> SecOrErr = dumpMipsABIFlags(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_NOBITS: {
-      ErrorOr<ELFYAML::NoBitsSection *> S = dumpNoBitsSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::NoBitsSection *> SecOrErr = dumpNoBitsSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GNU_verdef: {
-      ErrorOr<ELFYAML::VerdefSection *> S = dumpVerdefSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::VerdefSection *> SecOrErr = dumpVerdefSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GNU_versym: {
-      ErrorOr<ELFYAML::SymverSection *> S = dumpSymverSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::SymverSection *> SecOrErr = dumpSymverSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GNU_verneed: {
-      ErrorOr<ELFYAML::VerneedSection *> S = dumpVerneedSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::VerneedSection *> SecOrErr = dumpVerneedSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     default: {
-      ErrorOr<ELFYAML::RawContentSection *> S = dumpContentSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::RawContentSection *> SecOrErr =
+          dumpContentSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
     }
     }
   }
@@ -252,20 +253,19 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
 }
 
 template <class ELFT>
-std::error_code
-ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
+Error ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
                              std::vector<ELFYAML::Symbol> &Symbols) {
   if (!Symtab)
-    return std::error_code();
+    return Error::success();
 
   auto StrTableOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (!StrTableOrErr)
-    return errorToErrorCode(StrTableOrErr.takeError());
+    return StrTableOrErr.takeError();
   StringRef StrTable = *StrTableOrErr;
 
   auto SymtabOrErr = Obj.symbols(Symtab);
   if (!SymtabOrErr)
-    return errorToErrorCode(SymtabOrErr.takeError());
+    return SymtabOrErr.takeError();
 
   if (Symtab->sh_type == ELF::SHT_SYMTAB) {
     SymTable = *SymtabOrErr;
@@ -279,13 +279,12 @@ ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
     Symbols.push_back(S);
   }
 
-  return std::error_code();
+  return Error::success();
 }
 
 template <class ELFT>
-std::error_code
-ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-                            StringRef StrTable, ELFYAML::Symbol &S) {
+Error ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                                  StringRef StrTable, ELFYAML::Symbol &S) {
   S.Type = Sym->getType();
   S.Value = Sym->st_value;
   S.Size = Sym->st_size;
@@ -295,56 +294,56 @@ ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
   Expected<StringRef> SymbolNameOrErr =
       getUniquedSymbolName(Sym, StrTable, SymTab);
   if (!SymbolNameOrErr)
-    return errorToErrorCode(SymbolNameOrErr.takeError());
+    return SymbolNameOrErr.takeError();
   S.Name = SymbolNameOrErr.get();
 
   if (Sym->st_shndx >= ELF::SHN_LORESERVE) {
     if (Sym->st_shndx == ELF::SHN_XINDEX)
-      return obj2yaml_error::not_implemented;
+      return createStringError(obj2yaml_error::not_implemented,
+                               "SHN_XINDEX symbols are not supported");
     S.Index = (ELFYAML::ELF_SHN)Sym->st_shndx;
-    return obj2yaml_error::success;
+    return Error::success();
   }
 
   auto ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable);
   if (!ShdrOrErr)
-    return errorToErrorCode(ShdrOrErr.takeError());
+    return ShdrOrErr.takeError();
   const Elf_Shdr *Shdr = *ShdrOrErr;
   if (!Shdr)
-    return obj2yaml_error::success;
+    return Error::success();
 
   auto NameOrErr = getUniquedSectionName(Shdr);
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   S.Section = NameOrErr.get();
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
 template <class RelT>
-std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel,
-                                                const Elf_Shdr *SymTab,
-                                                ELFYAML::Relocation &R) {
+Error ELFDumper<ELFT>::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
+                                      ELFYAML::Relocation &R) {
   R.Type = Rel->getType(Obj.isMips64EL());
   R.Offset = Rel->r_offset;
   R.Addend = 0;
 
   auto SymOrErr = Obj.getRelocationSymbol(Rel, SymTab);
   if (!SymOrErr)
-    return errorToErrorCode(SymOrErr.takeError());
+    return SymOrErr.takeError();
   const Elf_Sym *Sym = *SymOrErr;
   auto StrTabSec = Obj.getSection(SymTab->sh_link);
   if (!StrTabSec)
-    return errorToErrorCode(StrTabSec.takeError());
+    return StrTabSec.takeError();
   auto StrTabOrErr = Obj.getStringTable(*StrTabSec);
   if (!StrTabOrErr)
-    return errorToErrorCode(StrTabOrErr.takeError());
+    return StrTabOrErr.takeError();
   StringRef StrTab = *StrTabOrErr;
 
   if (Sym) {
     Expected<StringRef> NameOrErr = getUniquedSymbolName(Sym, StrTab, SymTab);
     if (!NameOrErr)
-      return errorToErrorCode(NameOrErr.takeError());
+      return NameOrErr.takeError();
     R.Symbol = NameOrErr.get();
   } else {
     // We have some edge cases of relocations without a symbol associated,
@@ -354,12 +353,12 @@ std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel,
     R.Symbol = "";
   }
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
-std::error_code ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
-                                                   ELFYAML::Section &S) {
+Error ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
+                                         ELFYAML::Section &S) {
   // Dump fields. We do not dump the ShOffset field. When not explicitly
   // set, the value is set by yaml2obj automatically.
   S.Type = Shdr->sh_type;
@@ -372,51 +371,50 @@ std::error_code ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
 
   auto NameOrErr = getUniquedSectionName(Shdr);
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   S.Name = NameOrErr.get();
 
   if (Shdr->sh_link != ELF::SHN_UNDEF) {
     auto LinkSection = Obj.getSection(Shdr->sh_link);
     if (LinkSection.takeError())
-      return errorToErrorCode(LinkSection.takeError());
+      return LinkSection.takeError();
     NameOrErr = getUniquedSectionName(*LinkSection);
     if (!NameOrErr)
-      return errorToErrorCode(NameOrErr.takeError());
+      return NameOrErr.takeError();
     S.Link = NameOrErr.get();
   }
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
-std::error_code
-ELFDumper<ELFT>::dumpCommonRelocationSection(const Elf_Shdr *Shdr,
-                                             ELFYAML::RelocationSection &S) {
-  if (std::error_code EC = dumpCommonSection(Shdr, S))
-    return EC;
+Error ELFDumper<ELFT>::dumpCommonRelocationSection(
+    const Elf_Shdr *Shdr, ELFYAML::RelocationSection &S) {
+  if (Error E = dumpCommonSection(Shdr, S))
+    return E;
 
   auto InfoSection = Obj.getSection(Shdr->sh_info);
   if (!InfoSection)
-    return errorToErrorCode(InfoSection.takeError());
+    return InfoSection.takeError();
 
   auto NameOrErr = getUniquedSectionName(*InfoSection);
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   S.RelocatableSec = NameOrErr.get();
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::DynamicSection *>
+Expected<ELFYAML::DynamicSection *>
 ELFDumper<ELFT>::dumpDynamicSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::DynamicSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto DynTagsOrErr = Obj.template getSectionContentsAsArray<Elf_Dyn>(Shdr);
   if (!DynTagsOrErr)
-    return errorToErrorCode(DynTagsOrErr.takeError());
+    return DynTagsOrErr.takeError();
 
   for (const Elf_Dyn &Dyn : *DynTagsOrErr)
     S->Entries.push_back({(ELFYAML::ELF_DYNTAG)Dyn.getTag(), Dyn.getVal()});
@@ -425,35 +423,35 @@ ELFDumper<ELFT>::dumpDynamicSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::RelocationSection *>
+Expected<ELFYAML::RelocationSection *>
 ELFDumper<ELFT>::dumpRelocSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::RelocationSection>();
-  if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
-    return EC;
+  if (auto E = dumpCommonRelocationSection(Shdr, *S))
+    return std::move(E);
 
   auto SymTabOrErr = Obj.getSection(Shdr->sh_link);
   if (!SymTabOrErr)
-    return errorToErrorCode(SymTabOrErr.takeError());
+    return SymTabOrErr.takeError();
   const Elf_Shdr *SymTab = *SymTabOrErr;
 
   if (Shdr->sh_type == ELF::SHT_REL) {
     auto Rels = Obj.rels(Shdr);
     if (!Rels)
-      return errorToErrorCode(Rels.takeError());
+      return Rels.takeError();
     for (const Elf_Rel &Rel : *Rels) {
       ELFYAML::Relocation R;
-      if (std::error_code EC = dumpRelocation(&Rel, SymTab, R))
-        return EC;
+      if (Error E = dumpRelocation(&Rel, SymTab, R))
+        return std::move(E);
       S->Relocations.push_back(R);
     }
   } else {
     auto Rels = Obj.relas(Shdr);
     if (!Rels)
-      return errorToErrorCode(Rels.takeError());
+      return Rels.takeError();
     for (const Elf_Rela &Rel : *Rels) {
       ELFYAML::Relocation R;
-      if (std::error_code EC = dumpRelocation(&Rel, SymTab, R))
-        return EC;
+      if (Error E = dumpRelocation(&Rel, SymTab, R))
+        return std::move(E);
       R.Addend = Rel.r_addend;
       S->Relocations.push_back(R);
     }
@@ -463,16 +461,15 @@ ELFDumper<ELFT>::dumpRelocSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::RawContentSection *>
+Expected<ELFYAML::RawContentSection *>
 ELFDumper<ELFT>::dumpContentSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::RawContentSection>();
-
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto ContentOrErr = Obj.getSectionContents(Shdr);
   if (!ContentOrErr)
-    return errorToErrorCode(ContentOrErr.takeError());
+    return ContentOrErr.takeError();
   ArrayRef<uint8_t> Content = *ContentOrErr;
   if (!Content.empty())
     S->Content = yaml::BinaryRef(Content);
@@ -482,40 +479,39 @@ ELFDumper<ELFT>::dumpContentSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::NoBitsSection *>
+Expected<ELFYAML::NoBitsSection *>
 ELFDumper<ELFT>::dumpNoBitsSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::NoBitsSection>();
-
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
   S->Size = Shdr->sh_size;
 
   return S.release();
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::VerdefSection *>
+Expected<ELFYAML::VerdefSection *>
 ELFDumper<ELFT>::dumpVerdefSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Verdef Elf_Verdef;
   typedef typename ELFT::Verdaux Elf_Verdaux;
 
   auto S = make_unique<ELFYAML::VerdefSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   S->Info = Shdr->sh_info;
 
   auto StringTableShdrOrErr = Obj.getSection(Shdr->sh_link);
   if (!StringTableShdrOrErr)
-    return errorToErrorCode(StringTableShdrOrErr.takeError());
+    return StringTableShdrOrErr.takeError();
 
   auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr);
   if (!StringTableOrErr)
-    return errorToErrorCode(StringTableOrErr.takeError());
+    return StringTableOrErr.takeError();
 
   auto Contents = Obj.getSectionContents(Shdr);
   if (!Contents)
-    return errorToErrorCode(Contents.takeError());
+    return Contents.takeError();
 
   llvm::ArrayRef<uint8_t> Data = *Contents;
   const uint8_t *Buf = Data.data();
@@ -544,17 +540,17 @@ ELFDumper<ELFT>::dumpVerdefSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::SymverSection *>
+Expected<ELFYAML::SymverSection *>
 ELFDumper<ELFT>::dumpSymverSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Half Elf_Half;
 
   auto S = make_unique<ELFYAML::SymverSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto VersionsOrErr = Obj.template getSectionContentsAsArray<Elf_Half>(Shdr);
   if (!VersionsOrErr)
-    return errorToErrorCode(VersionsOrErr.takeError());
+    return VersionsOrErr.takeError();
   for (const Elf_Half &E : *VersionsOrErr)
     S->Entries.push_back(E);
 
@@ -562,28 +558,28 @@ ELFDumper<ELFT>::dumpSymverSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::VerneedSection *>
+Expected<ELFYAML::VerneedSection *>
 ELFDumper<ELFT>::dumpVerneedSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Verneed Elf_Verneed;
   typedef typename ELFT::Vernaux Elf_Vernaux;
 
   auto S = make_unique<ELFYAML::VerneedSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   S->Info = Shdr->sh_info;
 
   auto Contents = Obj.getSectionContents(Shdr);
   if (!Contents)
-    return errorToErrorCode(Contents.takeError());
+    return Contents.takeError();
 
   auto StringTableShdrOrErr = Obj.getSection(Shdr->sh_link);
   if (!StringTableShdrOrErr)
-    return errorToErrorCode(StringTableShdrOrErr.takeError());
+    return StringTableShdrOrErr.takeError();
 
   auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr);
   if (!StringTableOrErr)
-    return errorToErrorCode(StringTableOrErr.takeError());
+    return StringTableOrErr.takeError();
 
   llvm::ArrayRef<uint8_t> Data = *Contents;
   const uint8_t *Buf = Data.data();
@@ -619,32 +615,32 @@ ELFDumper<ELFT>::dumpVerneedSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
+Expected<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::Group>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto SymtabOrErr = Obj.getSection(Shdr->sh_link);
   if (!SymtabOrErr)
-    return errorToErrorCode(SymtabOrErr.takeError());
+    return SymtabOrErr.takeError();
   // Get symbol with index sh_info which name is the signature of the group.
   const Elf_Shdr *Symtab = *SymtabOrErr;
   auto SymOrErr = Obj.getSymbol(Symtab, Shdr->sh_info);
   if (!SymOrErr)
-    return errorToErrorCode(SymOrErr.takeError());
+    return SymOrErr.takeError();
   auto StrTabOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (!StrTabOrErr)
-    return errorToErrorCode(StrTabOrErr.takeError());
+    return StrTabOrErr.takeError();
 
   Expected<StringRef> SymbolName =
       getUniquedSymbolName(*SymOrErr, *StrTabOrErr, Symtab);
   if (!SymbolName)
-    return errorToErrorCode(SymbolName.takeError());
+    return SymbolName.takeError();
   S->Signature = *SymbolName;
 
   auto MembersOrErr = Obj.template getSectionContentsAsArray<Elf_Word>(Shdr);
   if (!MembersOrErr)
-    return errorToErrorCode(MembersOrErr.takeError());
+    return MembersOrErr.takeError();
 
   for (Elf_Word Member : *MembersOrErr) {
     if (Member == llvm::ELF::GRP_COMDAT) {
@@ -654,27 +650,27 @@ ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
 
     auto SHdrOrErr = Obj.getSection(Member);
     if (!SHdrOrErr)
-      return errorToErrorCode(SHdrOrErr.takeError());
+      return SHdrOrErr.takeError();
     auto NameOrErr = getUniquedSectionName(*SHdrOrErr);
     if (!NameOrErr)
-      return errorToErrorCode(NameOrErr.takeError());
+      return NameOrErr.takeError();
     S->Members.push_back({*NameOrErr});
   }
   return S.release();
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::MipsABIFlags *>
+Expected<ELFYAML::MipsABIFlags *>
 ELFDumper<ELFT>::dumpMipsABIFlags(const Elf_Shdr *Shdr) {
   assert(Shdr->sh_type == ELF::SHT_MIPS_ABIFLAGS &&
          "Section type is not SHT_MIPS_ABIFLAGS");
   auto S = make_unique<ELFYAML::MipsABIFlags>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto ContentOrErr = Obj.getSectionContents(Shdr);
   if (!ContentOrErr)
-    return errorToErrorCode(ContentOrErr.takeError());
+    return ContentOrErr.takeError();
 
   auto *Flags = reinterpret_cast<const object::Elf_Mips_ABIFlags<ELFT> *>(
       ContentOrErr.get().data());
@@ -693,21 +689,20 @@ ELFDumper<ELFT>::dumpMipsABIFlags(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-static std::error_code elf2yaml(raw_ostream &Out,
-                                const object::ELFFile<ELFT> &Obj) {
+static Error elf2yaml(raw_ostream &Out, const object::ELFFile<ELFT> &Obj) {
   ELFDumper<ELFT> Dumper(Obj);
-  ErrorOr<ELFYAML::Object *> YAMLOrErr = Dumper.dump();
-  if (std::error_code EC = YAMLOrErr.getError())
-    return EC;
+  Expected<ELFYAML::Object *> YAMLOrErr = Dumper.dump();
+  if (!YAMLOrErr)
+    return YAMLOrErr.takeError();
 
   std::unique_ptr<ELFYAML::Object> YAML(YAMLOrErr.get());
   yaml::Output Yout(Out);
   Yout << *YAML;
 
-  return std::error_code();
+  return Error::success();
 }
 
-std::error_code elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
+Error elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
   if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
     return elf2yaml(Out, *ELFObj->getELFFile());
 
@@ -720,5 +715,5 @@ std::error_code elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
   if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
     return elf2yaml(Out, *ELFObj->getELFFile());
 
-  return obj2yaml_error::unsupported_obj_file_format;
+  llvm_unreachable("unknown ELF file format");
 }
diff --git a/llvm/tools/obj2yaml/obj2yaml.cpp b/llvm/tools/obj2yaml/obj2yaml.cpp
index 8622e38319b6c..f03b1ef4bade3 100644
--- a/llvm/tools/obj2yaml/obj2yaml.cpp
+++ b/llvm/tools/obj2yaml/obj2yaml.cpp
@@ -17,19 +17,20 @@
 using namespace llvm;
 using namespace llvm::object;
 
-static std::error_code dumpObject(const ObjectFile &Obj) {
+static Error dumpObject(const ObjectFile &Obj) {
   if (Obj.isCOFF())
-    return coff2yaml(outs(), cast<COFFObjectFile>(Obj));
+    return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
 
   if (Obj.isXCOFF())
-    return xcoff2yaml(outs(), cast<XCOFFObjectFile>(Obj));
+    return errorCodeToError(xcoff2yaml(outs(), cast<XCOFFObjectFile>(Obj)));
 
   if (Obj.isELF())
     return elf2yaml(outs(), Obj);
+
   if (Obj.isWasm())
-    return wasm2yaml(outs(), cast<WasmObjectFile>(Obj));
+    return errorCodeToError(wasm2yaml(outs(), cast<WasmObjectFile>(Obj)));
 
-  return obj2yaml_error::unsupported_obj_file_format;
+  return errorCodeToError(obj2yaml_error::unsupported_obj_file_format);
 }
 
 static Error dumpInput(StringRef File) {
@@ -44,7 +45,7 @@ static Error dumpInput(StringRef File) {
     return errorCodeToError(macho2yaml(outs(), Binary));
   // TODO: If this is an archive, then burst it and dump each entry
   if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
-    return errorCodeToError(dumpObject(*Obj));
+    return dumpObject(*Obj);
   if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
     return minidump2yaml(outs(), *Minidump);
 
diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h
index b40e2c5c5a640..4f4a5330429ff 100644
--- a/llvm/tools/obj2yaml/obj2yaml.h
+++ b/llvm/tools/obj2yaml/obj2yaml.h
@@ -21,7 +21,7 @@
 
 std::error_code coff2yaml(llvm::raw_ostream &Out,
                           const llvm::object::COFFObjectFile &Obj);
-std::error_code elf2yaml(llvm::raw_ostream &Out,
+llvm::Error elf2yaml(llvm::raw_ostream &Out,
                          const llvm::object::ObjectFile &Obj);
 std::error_code macho2yaml(llvm::raw_ostream &Out,
                            const llvm::object::Binary &Obj);

From b91403d46701c4fb64b213533e3e62e5e5b5ec1d Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 15 Jul 2019 11:00:42 +0000
Subject: [PATCH 089/451] Revert r366052 "[obj2yaml] - Rework tool's error
 reporting logic for ELF target."

Seems it broke LLD:
http://lab.llvm.org:8011/builders/sanitizer-windows/builds/48434

llvm-svn: 366053
---
 llvm/include/llvm/Object/ELF.h                |  23 +-
 llvm/test/tools/obj2yaml/section-group.test   |  24 +-
 .../obj2yaml/special-symbol-indices.yaml      |   2 +-
 llvm/tools/obj2yaml/elf2yaml.cpp              | 319 +++++++++---------
 llvm/tools/obj2yaml/obj2yaml.cpp              |  13 +-
 llvm/tools/obj2yaml/obj2yaml.h                |   2 +-
 6 files changed, 184 insertions(+), 199 deletions(-)

diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 7bc6dc4620c7d..8fe9f2919c5b4 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -355,20 +355,23 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
 }
 
 template <class ELFT>
-Expected<const typename ELFT::Sym *>
-ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
-  auto SymsOrErr = symbols(Sec);
-  if (!SymsOrErr)
-    return SymsOrErr.takeError();
-
-  Elf_Sym_Range Symbols = *SymsOrErr;
+inline Expected<const typename ELFT::Sym *>
+getSymbol(typename ELFT::SymRange Symbols, uint32_t Index) {
   if (Index >= Symbols.size())
-    return createError("unable to get symbol from section " +
-                       getSecIndexForError(this, Sec) +
-                       ": invalid symbol index (" + Twine(Index) + ")");
+    // TODO: this error is untested.
+    return createError("invalid symbol index");
   return &Symbols[Index];
 }
 
+template <class ELFT>
+Expected<const typename ELFT::Sym *>
+ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
+  auto SymtabOrErr = symbols(Sec);
+  if (!SymtabOrErr)
+    return SymtabOrErr.takeError();
+  return object::getSymbol<ELFT>(*SymtabOrErr, Index);
+}
+
 template <class ELFT>
 template <typename T>
 Expected<ArrayRef<T>>
diff --git a/llvm/test/tools/obj2yaml/section-group.test b/llvm/test/tools/obj2yaml/section-group.test
index cd520cb1b361f..78af00cd138b3 100644
--- a/llvm/test/tools/obj2yaml/section-group.test
+++ b/llvm/test/tools/obj2yaml/section-group.test
@@ -1,6 +1,6 @@
 ## Checks that the tool is able to read section groups from ELF.
 
-# RUN: yaml2obj --docnum=1 %s > %t1.o
+# RUN: yaml2obj %s > %t1.o
 # RUN: llvm-readobj --elf-section-groups %t1.o | FileCheck %s -check-prefix=OBJ
 # RUN: obj2yaml %t1.o | FileCheck %s --check-prefix YAML
 
@@ -46,25 +46,3 @@ Symbols:
   - Name:    signature
     Type:    STT_OBJECT
     Section: .rodata
-
-## Check obj2yaml report an error when sh_info field of
-## group section contains invalid (too large) signature symbol index.
-
-# RUN: yaml2obj --docnum=2 %s > %t2.o
-# RUN: not obj2yaml %t2.o 2>&1 | FileCheck %s --check-prefix ERR
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .group
-    Type: SHT_GROUP
-    Link: .symtab
-    Info: 0xFF
-    Members:
-      - SectionOrType: GRP_COMDAT
-
-# ERR: Error reading file: {{.*}}2.o: unable to get symbol from section [index 2]: invalid symbol index (255)
diff --git a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml b/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
index fcc2a705f9c75..25550c944f385 100644
--- a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
+++ b/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
@@ -51,4 +51,4 @@ Symbols:
 ## shn_xindex.o contains a symbol with st_shndx == SHN_XINDEX.
 ## We do not support it at this moment.
 # RUN: not obj2yaml %S/Inputs/shn_xindex.o 2>&1 | FileCheck %s --check-prefix=ERR
-# ERR: Error reading file: {{.*}}shn_xindex.o: SHN_XINDEX symbols are not supported
+# ERR: Error reading file: {{.*}}shn_xindex.o: Feature not yet implemented.
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index bd27c103403eb..7404bae2a08ac 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -44,31 +44,31 @@ class ELFDumper {
   const object::ELFFile<ELFT> &Obj;
   ArrayRef<Elf_Word> ShndxTable;
 
-  Error dumpSymbols(const Elf_Shdr *Symtab,
-                    std::vector<ELFYAML::Symbol> &Symbols);
-  Error dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-                   StringRef StrTable, ELFYAML::Symbol &S);
-  Error dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
-  Error dumpCommonRelocationSection(const Elf_Shdr *Shdr,
-                                    ELFYAML::RelocationSection &S);
+  std::error_code dumpSymbols(const Elf_Shdr *Symtab,
+                              std::vector<ELFYAML::Symbol> &Symbols);
+  std::error_code dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                             StringRef StrTable, ELFYAML::Symbol &S);
+  std::error_code dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
+  std::error_code dumpCommonRelocationSection(const Elf_Shdr *Shdr,
+                                              ELFYAML::RelocationSection &S);
   template <class RelT>
-  Error dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
-                       ELFYAML::Relocation &R);
-
-  Expected<ELFYAML::DynamicSection *> dumpDynamicSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::RelocationSection *> dumpRelocSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::RawContentSection *>
+  std::error_code dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
+                                 ELFYAML::Relocation &R);
+  
+  ErrorOr<ELFYAML::DynamicSection *> dumpDynamicSection(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::RelocationSection *> dumpRelocSection(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::RawContentSection *>
   dumpContentSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::NoBitsSection *> dumpNoBitsSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::VerdefSection *> dumpVerdefSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::SymverSection *> dumpSymverSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::VerneedSection *> dumpVerneedSection(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::Group *> dumpGroup(const Elf_Shdr *Shdr);
-  Expected<ELFYAML::MipsABIFlags *> dumpMipsABIFlags(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::NoBitsSection *> dumpNoBitsSection(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::VerdefSection *> dumpVerdefSection(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::SymverSection *> dumpSymverSection(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::VerneedSection *> dumpVerneedSection(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::Group *> dumpGroup(const Elf_Shdr *Shdr);
+  ErrorOr<ELFYAML::MipsABIFlags *> dumpMipsABIFlags(const Elf_Shdr *Shdr);
 
 public:
   ELFDumper(const object::ELFFile<ELFT> &O);
-  Expected<ELFYAML::Object *> dump();
+  ErrorOr<ELFYAML::Object *> dump();
 };
 
 }
@@ -134,7 +134,7 @@ ELFDumper<ELFT>::getUniquedSymbolName(const Elf_Sym *Sym, StringRef StrTable,
   return Name;
 }
 
-template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
+template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   auto Y = make_unique<ELFYAML::Object>();
 
   // Dump header. We do not dump SHEntSize, SHOffset, SHNum and SHStrNdx field.
@@ -152,7 +152,7 @@ template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   // Dump sections
   auto SectionsOrErr = Obj.sections();
   if (!SectionsOrErr)
-    return SectionsOrErr.takeError();
+    return errorToErrorCode(SectionsOrErr.takeError());
   Sections = *SectionsOrErr;
   SectionNames.resize(Sections.size());
 
@@ -160,20 +160,20 @@ template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   // to access the deduplicated symbol names that we also create here.
   for (const Elf_Shdr &Sec : Sections) {
     if (Sec.sh_type == ELF::SHT_SYMTAB)
-      if (Error E = dumpSymbols(&Sec, Y->Symbols))
-        return std::move(E);
+      if (auto EC = dumpSymbols(&Sec, Y->Symbols))
+        return EC;
     if (Sec.sh_type == ELF::SHT_DYNSYM)
-      if (Error E = dumpSymbols(&Sec, Y->DynamicSymbols))
-        return std::move(E);
+      if (auto EC = dumpSymbols(&Sec, Y->DynamicSymbols))
+        return EC;
   }
 
   for (const Elf_Shdr &Sec : Sections) {
     switch (Sec.sh_type) {
     case ELF::SHT_DYNAMIC: {
-      Expected<ELFYAML::DynamicSection *> SecOrErr = dumpDynamicSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::DynamicSection *> S = dumpDynamicSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
       break;
     }
     case ELF::SHT_NULL:
@@ -185,66 +185,65 @@ template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
     case ELF::SHT_SYMTAB_SHNDX: {
       auto TableOrErr = Obj.getSHNDXTable(Sec);
       if (!TableOrErr)
-        return TableOrErr.takeError();
+        return errorToErrorCode(TableOrErr.takeError());
       ShndxTable = *TableOrErr;
       break;
     }
     case ELF::SHT_REL:
     case ELF::SHT_RELA: {
-      Expected<ELFYAML::RelocationSection *> SecOrErr = dumpRelocSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::RelocationSection *> S = dumpRelocSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
       break;
     }
     case ELF::SHT_GROUP: {
-      Expected<ELFYAML::Group *> GroupOrErr = dumpGroup(&Sec);
-      if (!GroupOrErr)
-        return GroupOrErr.takeError();
-      Y->Sections.emplace_back(*GroupOrErr);
+      ErrorOr<ELFYAML::Group *> G = dumpGroup(&Sec);
+      if (std::error_code EC = G.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(G.get()));
       break;
     }
     case ELF::SHT_MIPS_ABIFLAGS: {
-      Expected<ELFYAML::MipsABIFlags *> SecOrErr = dumpMipsABIFlags(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::MipsABIFlags *> G = dumpMipsABIFlags(&Sec);
+      if (std::error_code EC = G.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(G.get()));
       break;
     }
     case ELF::SHT_NOBITS: {
-      Expected<ELFYAML::NoBitsSection *> SecOrErr = dumpNoBitsSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::NoBitsSection *> S = dumpNoBitsSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
       break;
     }
     case ELF::SHT_GNU_verdef: {
-      Expected<ELFYAML::VerdefSection *> SecOrErr = dumpVerdefSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::VerdefSection *> S = dumpVerdefSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
       break;
     }
     case ELF::SHT_GNU_versym: {
-      Expected<ELFYAML::SymverSection *> SecOrErr = dumpSymverSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::SymverSection *> S = dumpSymverSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
       break;
     }
     case ELF::SHT_GNU_verneed: {
-      Expected<ELFYAML::VerneedSection *> SecOrErr = dumpVerneedSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::VerneedSection *> S = dumpVerneedSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
       break;
     }
     default: {
-      Expected<ELFYAML::RawContentSection *> SecOrErr =
-          dumpContentSection(&Sec);
-      if (!SecOrErr)
-        return SecOrErr.takeError();
-      Y->Sections.emplace_back(*SecOrErr);
+      ErrorOr<ELFYAML::RawContentSection *> S = dumpContentSection(&Sec);
+      if (std::error_code EC = S.getError())
+        return EC;
+      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
     }
     }
   }
@@ -253,19 +252,20 @@ template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
 }
 
 template <class ELFT>
-Error ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
+std::error_code
+ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
                              std::vector<ELFYAML::Symbol> &Symbols) {
   if (!Symtab)
-    return Error::success();
+    return std::error_code();
 
   auto StrTableOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (!StrTableOrErr)
-    return StrTableOrErr.takeError();
+    return errorToErrorCode(StrTableOrErr.takeError());
   StringRef StrTable = *StrTableOrErr;
 
   auto SymtabOrErr = Obj.symbols(Symtab);
   if (!SymtabOrErr)
-    return SymtabOrErr.takeError();
+    return errorToErrorCode(SymtabOrErr.takeError());
 
   if (Symtab->sh_type == ELF::SHT_SYMTAB) {
     SymTable = *SymtabOrErr;
@@ -279,12 +279,13 @@ Error ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
     Symbols.push_back(S);
   }
 
-  return Error::success();
+  return std::error_code();
 }
 
 template <class ELFT>
-Error ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-                                  StringRef StrTable, ELFYAML::Symbol &S) {
+std::error_code
+ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                            StringRef StrTable, ELFYAML::Symbol &S) {
   S.Type = Sym->getType();
   S.Value = Sym->st_value;
   S.Size = Sym->st_size;
@@ -294,56 +295,56 @@ Error ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
   Expected<StringRef> SymbolNameOrErr =
       getUniquedSymbolName(Sym, StrTable, SymTab);
   if (!SymbolNameOrErr)
-    return SymbolNameOrErr.takeError();
+    return errorToErrorCode(SymbolNameOrErr.takeError());
   S.Name = SymbolNameOrErr.get();
 
   if (Sym->st_shndx >= ELF::SHN_LORESERVE) {
     if (Sym->st_shndx == ELF::SHN_XINDEX)
-      return createStringError(obj2yaml_error::not_implemented,
-                               "SHN_XINDEX symbols are not supported");
+      return obj2yaml_error::not_implemented;
     S.Index = (ELFYAML::ELF_SHN)Sym->st_shndx;
-    return Error::success();
+    return obj2yaml_error::success;
   }
 
   auto ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable);
   if (!ShdrOrErr)
-    return ShdrOrErr.takeError();
+    return errorToErrorCode(ShdrOrErr.takeError());
   const Elf_Shdr *Shdr = *ShdrOrErr;
   if (!Shdr)
-    return Error::success();
+    return obj2yaml_error::success;
 
   auto NameOrErr = getUniquedSectionName(Shdr);
   if (!NameOrErr)
-    return NameOrErr.takeError();
+    return errorToErrorCode(NameOrErr.takeError());
   S.Section = NameOrErr.get();
 
-  return Error::success();
+  return obj2yaml_error::success;
 }
 
 template <class ELFT>
 template <class RelT>
-Error ELFDumper<ELFT>::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
-                                      ELFYAML::Relocation &R) {
+std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel,
+                                                const Elf_Shdr *SymTab,
+                                                ELFYAML::Relocation &R) {
   R.Type = Rel->getType(Obj.isMips64EL());
   R.Offset = Rel->r_offset;
   R.Addend = 0;
 
   auto SymOrErr = Obj.getRelocationSymbol(Rel, SymTab);
   if (!SymOrErr)
-    return SymOrErr.takeError();
+    return errorToErrorCode(SymOrErr.takeError());
   const Elf_Sym *Sym = *SymOrErr;
   auto StrTabSec = Obj.getSection(SymTab->sh_link);
   if (!StrTabSec)
-    return StrTabSec.takeError();
+    return errorToErrorCode(StrTabSec.takeError());
   auto StrTabOrErr = Obj.getStringTable(*StrTabSec);
   if (!StrTabOrErr)
-    return StrTabOrErr.takeError();
+    return errorToErrorCode(StrTabOrErr.takeError());
   StringRef StrTab = *StrTabOrErr;
 
   if (Sym) {
     Expected<StringRef> NameOrErr = getUniquedSymbolName(Sym, StrTab, SymTab);
     if (!NameOrErr)
-      return NameOrErr.takeError();
+      return errorToErrorCode(NameOrErr.takeError());
     R.Symbol = NameOrErr.get();
   } else {
     // We have some edge cases of relocations without a symbol associated,
@@ -353,12 +354,12 @@ Error ELFDumper<ELFT>::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
     R.Symbol = "";
   }
 
-  return Error::success();
+  return obj2yaml_error::success;
 }
 
 template <class ELFT>
-Error ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
-                                         ELFYAML::Section &S) {
+std::error_code ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
+                                                   ELFYAML::Section &S) {
   // Dump fields. We do not dump the ShOffset field. When not explicitly
   // set, the value is set by yaml2obj automatically.
   S.Type = Shdr->sh_type;
@@ -371,50 +372,51 @@ Error ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
 
   auto NameOrErr = getUniquedSectionName(Shdr);
   if (!NameOrErr)
-    return NameOrErr.takeError();
+    return errorToErrorCode(NameOrErr.takeError());
   S.Name = NameOrErr.get();
 
   if (Shdr->sh_link != ELF::SHN_UNDEF) {
     auto LinkSection = Obj.getSection(Shdr->sh_link);
     if (LinkSection.takeError())
-      return LinkSection.takeError();
+      return errorToErrorCode(LinkSection.takeError());
     NameOrErr = getUniquedSectionName(*LinkSection);
     if (!NameOrErr)
-      return NameOrErr.takeError();
+      return errorToErrorCode(NameOrErr.takeError());
     S.Link = NameOrErr.get();
   }
 
-  return Error::success();
+  return obj2yaml_error::success;
 }
 
 template <class ELFT>
-Error ELFDumper<ELFT>::dumpCommonRelocationSection(
-    const Elf_Shdr *Shdr, ELFYAML::RelocationSection &S) {
-  if (Error E = dumpCommonSection(Shdr, S))
-    return E;
+std::error_code
+ELFDumper<ELFT>::dumpCommonRelocationSection(const Elf_Shdr *Shdr,
+                                             ELFYAML::RelocationSection &S) {
+  if (std::error_code EC = dumpCommonSection(Shdr, S))
+    return EC;
 
   auto InfoSection = Obj.getSection(Shdr->sh_info);
   if (!InfoSection)
-    return InfoSection.takeError();
+    return errorToErrorCode(InfoSection.takeError());
 
   auto NameOrErr = getUniquedSectionName(*InfoSection);
   if (!NameOrErr)
-    return NameOrErr.takeError();
+    return errorToErrorCode(NameOrErr.takeError());
   S.RelocatableSec = NameOrErr.get();
 
-  return Error::success();
+  return obj2yaml_error::success;
 }
 
 template <class ELFT>
-Expected<ELFYAML::DynamicSection *>
+ErrorOr<ELFYAML::DynamicSection *>
 ELFDumper<ELFT>::dumpDynamicSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::DynamicSection>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   auto DynTagsOrErr = Obj.template getSectionContentsAsArray<Elf_Dyn>(Shdr);
   if (!DynTagsOrErr)
-    return DynTagsOrErr.takeError();
+    return errorToErrorCode(DynTagsOrErr.takeError());
 
   for (const Elf_Dyn &Dyn : *DynTagsOrErr)
     S->Entries.push_back({(ELFYAML::ELF_DYNTAG)Dyn.getTag(), Dyn.getVal()});
@@ -423,35 +425,35 @@ ELFDumper<ELFT>::dumpDynamicSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-Expected<ELFYAML::RelocationSection *>
+ErrorOr<ELFYAML::RelocationSection *>
 ELFDumper<ELFT>::dumpRelocSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::RelocationSection>();
-  if (auto E = dumpCommonRelocationSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
+    return EC;
 
   auto SymTabOrErr = Obj.getSection(Shdr->sh_link);
   if (!SymTabOrErr)
-    return SymTabOrErr.takeError();
+    return errorToErrorCode(SymTabOrErr.takeError());
   const Elf_Shdr *SymTab = *SymTabOrErr;
 
   if (Shdr->sh_type == ELF::SHT_REL) {
     auto Rels = Obj.rels(Shdr);
     if (!Rels)
-      return Rels.takeError();
+      return errorToErrorCode(Rels.takeError());
     for (const Elf_Rel &Rel : *Rels) {
       ELFYAML::Relocation R;
-      if (Error E = dumpRelocation(&Rel, SymTab, R))
-        return std::move(E);
+      if (std::error_code EC = dumpRelocation(&Rel, SymTab, R))
+        return EC;
       S->Relocations.push_back(R);
     }
   } else {
     auto Rels = Obj.relas(Shdr);
     if (!Rels)
-      return Rels.takeError();
+      return errorToErrorCode(Rels.takeError());
     for (const Elf_Rela &Rel : *Rels) {
       ELFYAML::Relocation R;
-      if (Error E = dumpRelocation(&Rel, SymTab, R))
-        return std::move(E);
+      if (std::error_code EC = dumpRelocation(&Rel, SymTab, R))
+        return EC;
       R.Addend = Rel.r_addend;
       S->Relocations.push_back(R);
     }
@@ -461,15 +463,16 @@ ELFDumper<ELFT>::dumpRelocSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-Expected<ELFYAML::RawContentSection *>
+ErrorOr<ELFYAML::RawContentSection *>
 ELFDumper<ELFT>::dumpContentSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::RawContentSection>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   auto ContentOrErr = Obj.getSectionContents(Shdr);
   if (!ContentOrErr)
-    return ContentOrErr.takeError();
+    return errorToErrorCode(ContentOrErr.takeError());
   ArrayRef<uint8_t> Content = *ContentOrErr;
   if (!Content.empty())
     S->Content = yaml::BinaryRef(Content);
@@ -479,39 +482,40 @@ ELFDumper<ELFT>::dumpContentSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-Expected<ELFYAML::NoBitsSection *>
+ErrorOr<ELFYAML::NoBitsSection *>
 ELFDumper<ELFT>::dumpNoBitsSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::NoBitsSection>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
   S->Size = Shdr->sh_size;
 
   return S.release();
 }
 
 template <class ELFT>
-Expected<ELFYAML::VerdefSection *>
+ErrorOr<ELFYAML::VerdefSection *>
 ELFDumper<ELFT>::dumpVerdefSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Verdef Elf_Verdef;
   typedef typename ELFT::Verdaux Elf_Verdaux;
 
   auto S = make_unique<ELFYAML::VerdefSection>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   S->Info = Shdr->sh_info;
 
   auto StringTableShdrOrErr = Obj.getSection(Shdr->sh_link);
   if (!StringTableShdrOrErr)
-    return StringTableShdrOrErr.takeError();
+    return errorToErrorCode(StringTableShdrOrErr.takeError());
 
   auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr);
   if (!StringTableOrErr)
-    return StringTableOrErr.takeError();
+    return errorToErrorCode(StringTableOrErr.takeError());
 
   auto Contents = Obj.getSectionContents(Shdr);
   if (!Contents)
-    return Contents.takeError();
+    return errorToErrorCode(Contents.takeError());
 
   llvm::ArrayRef<uint8_t> Data = *Contents;
   const uint8_t *Buf = Data.data();
@@ -540,17 +544,17 @@ ELFDumper<ELFT>::dumpVerdefSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-Expected<ELFYAML::SymverSection *>
+ErrorOr<ELFYAML::SymverSection *>
 ELFDumper<ELFT>::dumpSymverSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Half Elf_Half;
 
   auto S = make_unique<ELFYAML::SymverSection>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   auto VersionsOrErr = Obj.template getSectionContentsAsArray<Elf_Half>(Shdr);
   if (!VersionsOrErr)
-    return VersionsOrErr.takeError();
+    return errorToErrorCode(VersionsOrErr.takeError());
   for (const Elf_Half &E : *VersionsOrErr)
     S->Entries.push_back(E);
 
@@ -558,28 +562,28 @@ ELFDumper<ELFT>::dumpSymverSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-Expected<ELFYAML::VerneedSection *>
+ErrorOr<ELFYAML::VerneedSection *>
 ELFDumper<ELFT>::dumpVerneedSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Verneed Elf_Verneed;
   typedef typename ELFT::Vernaux Elf_Vernaux;
 
   auto S = make_unique<ELFYAML::VerneedSection>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   S->Info = Shdr->sh_info;
 
   auto Contents = Obj.getSectionContents(Shdr);
   if (!Contents)
-    return Contents.takeError();
+    return errorToErrorCode(Contents.takeError());
 
   auto StringTableShdrOrErr = Obj.getSection(Shdr->sh_link);
   if (!StringTableShdrOrErr)
-    return StringTableShdrOrErr.takeError();
+    return errorToErrorCode(StringTableShdrOrErr.takeError());
 
   auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr);
   if (!StringTableOrErr)
-    return StringTableOrErr.takeError();
+    return errorToErrorCode(StringTableOrErr.takeError());
 
   llvm::ArrayRef<uint8_t> Data = *Contents;
   const uint8_t *Buf = Data.data();
@@ -615,32 +619,32 @@ ELFDumper<ELFT>::dumpVerneedSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-Expected<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
+ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::Group>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   auto SymtabOrErr = Obj.getSection(Shdr->sh_link);
   if (!SymtabOrErr)
-    return SymtabOrErr.takeError();
+    return errorToErrorCode(SymtabOrErr.takeError());
   // Get symbol with index sh_info which name is the signature of the group.
   const Elf_Shdr *Symtab = *SymtabOrErr;
   auto SymOrErr = Obj.getSymbol(Symtab, Shdr->sh_info);
   if (!SymOrErr)
-    return SymOrErr.takeError();
+    return errorToErrorCode(SymOrErr.takeError());
   auto StrTabOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (!StrTabOrErr)
-    return StrTabOrErr.takeError();
+    return errorToErrorCode(StrTabOrErr.takeError());
 
   Expected<StringRef> SymbolName =
       getUniquedSymbolName(*SymOrErr, *StrTabOrErr, Symtab);
   if (!SymbolName)
-    return SymbolName.takeError();
+    return errorToErrorCode(SymbolName.takeError());
   S->Signature = *SymbolName;
 
   auto MembersOrErr = Obj.template getSectionContentsAsArray<Elf_Word>(Shdr);
   if (!MembersOrErr)
-    return MembersOrErr.takeError();
+    return errorToErrorCode(MembersOrErr.takeError());
 
   for (Elf_Word Member : *MembersOrErr) {
     if (Member == llvm::ELF::GRP_COMDAT) {
@@ -650,27 +654,27 @@ Expected<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
 
     auto SHdrOrErr = Obj.getSection(Member);
     if (!SHdrOrErr)
-      return SHdrOrErr.takeError();
+      return errorToErrorCode(SHdrOrErr.takeError());
     auto NameOrErr = getUniquedSectionName(*SHdrOrErr);
     if (!NameOrErr)
-      return NameOrErr.takeError();
+      return errorToErrorCode(NameOrErr.takeError());
     S->Members.push_back({*NameOrErr});
   }
   return S.release();
 }
 
 template <class ELFT>
-Expected<ELFYAML::MipsABIFlags *>
+ErrorOr<ELFYAML::MipsABIFlags *>
 ELFDumper<ELFT>::dumpMipsABIFlags(const Elf_Shdr *Shdr) {
   assert(Shdr->sh_type == ELF::SHT_MIPS_ABIFLAGS &&
          "Section type is not SHT_MIPS_ABIFLAGS");
   auto S = make_unique<ELFYAML::MipsABIFlags>();
-  if (Error E = dumpCommonSection(Shdr, *S))
-    return std::move(E);
+  if (std::error_code EC = dumpCommonSection(Shdr, *S))
+    return EC;
 
   auto ContentOrErr = Obj.getSectionContents(Shdr);
   if (!ContentOrErr)
-    return ContentOrErr.takeError();
+    return errorToErrorCode(ContentOrErr.takeError());
 
   auto *Flags = reinterpret_cast<const object::Elf_Mips_ABIFlags<ELFT> *>(
       ContentOrErr.get().data());
@@ -689,20 +693,21 @@ ELFDumper<ELFT>::dumpMipsABIFlags(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-static Error elf2yaml(raw_ostream &Out, const object::ELFFile<ELFT> &Obj) {
+static std::error_code elf2yaml(raw_ostream &Out,
+                                const object::ELFFile<ELFT> &Obj) {
   ELFDumper<ELFT> Dumper(Obj);
-  Expected<ELFYAML::Object *> YAMLOrErr = Dumper.dump();
-  if (!YAMLOrErr)
-    return YAMLOrErr.takeError();
+  ErrorOr<ELFYAML::Object *> YAMLOrErr = Dumper.dump();
+  if (std::error_code EC = YAMLOrErr.getError())
+    return EC;
 
   std::unique_ptr<ELFYAML::Object> YAML(YAMLOrErr.get());
   yaml::Output Yout(Out);
   Yout << *YAML;
 
-  return Error::success();
+  return std::error_code();
 }
 
-Error elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
+std::error_code elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
   if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
     return elf2yaml(Out, *ELFObj->getELFFile());
 
@@ -715,5 +720,5 @@ Error elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
   if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
     return elf2yaml(Out, *ELFObj->getELFFile());
 
-  llvm_unreachable("unknown ELF file format");
+  return obj2yaml_error::unsupported_obj_file_format;
 }
diff --git a/llvm/tools/obj2yaml/obj2yaml.cpp b/llvm/tools/obj2yaml/obj2yaml.cpp
index f03b1ef4bade3..8622e38319b6c 100644
--- a/llvm/tools/obj2yaml/obj2yaml.cpp
+++ b/llvm/tools/obj2yaml/obj2yaml.cpp
@@ -17,20 +17,19 @@
 using namespace llvm;
 using namespace llvm::object;
 
-static Error dumpObject(const ObjectFile &Obj) {
+static std::error_code dumpObject(const ObjectFile &Obj) {
   if (Obj.isCOFF())
-    return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
+    return coff2yaml(outs(), cast<COFFObjectFile>(Obj));
 
   if (Obj.isXCOFF())
-    return errorCodeToError(xcoff2yaml(outs(), cast<XCOFFObjectFile>(Obj)));
+    return xcoff2yaml(outs(), cast<XCOFFObjectFile>(Obj));
 
   if (Obj.isELF())
     return elf2yaml(outs(), Obj);
-
   if (Obj.isWasm())
-    return errorCodeToError(wasm2yaml(outs(), cast<WasmObjectFile>(Obj)));
+    return wasm2yaml(outs(), cast<WasmObjectFile>(Obj));
 
-  return errorCodeToError(obj2yaml_error::unsupported_obj_file_format);
+  return obj2yaml_error::unsupported_obj_file_format;
 }
 
 static Error dumpInput(StringRef File) {
@@ -45,7 +44,7 @@ static Error dumpInput(StringRef File) {
     return errorCodeToError(macho2yaml(outs(), Binary));
   // TODO: If this is an archive, then burst it and dump each entry
   if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
-    return dumpObject(*Obj);
+    return errorCodeToError(dumpObject(*Obj));
   if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
     return minidump2yaml(outs(), *Minidump);
 
diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h
index 4f4a5330429ff..b40e2c5c5a640 100644
--- a/llvm/tools/obj2yaml/obj2yaml.h
+++ b/llvm/tools/obj2yaml/obj2yaml.h
@@ -21,7 +21,7 @@
 
 std::error_code coff2yaml(llvm::raw_ostream &Out,
                           const llvm::object::COFFObjectFile &Obj);
-llvm::Error elf2yaml(llvm::raw_ostream &Out,
+std::error_code elf2yaml(llvm::raw_ostream &Out,
                          const llvm::object::ObjectFile &Obj);
 std::error_code macho2yaml(llvm::raw_ostream &Out,
                            const llvm::object::Binary &Obj);

From f059147a108b73681d2b394aa358800e9662a02d Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 15 Jul 2019 11:22:05 +0000
Subject: [PATCH 090/451] [ARM] Move Shifts after Bits. NFC

This just moves the shift instruction definitions further down the
ARMInstrMVE.td file, to make positioning patterns slightly more natural.

llvm-svn: 366054
---
 llvm/lib/Target/ARM/ARMInstrMVE.td | 2249 ++++++++++++++--------------
 1 file changed, 1125 insertions(+), 1124 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 10ed876f484a0..9eec9a6f096ab 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -939,30 +939,11 @@ let Predicates = [HasMVEInt] in {
 
 // end of mve_comp instructions
 
-// start of mve_imm_shift instructions
-
-def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
-                      (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
-                      NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
-                      vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
-  bits<5> imm;
-  bits<4> Qd;
-  bits<4> RdmDest;
-
-  let Inst{28} = 0b0;
-  let Inst{25-23} = 0b101;
-  let Inst{22} = Qd{3};
-  let Inst{21} = 0b1;
-  let Inst{20-16} = imm{4-0};
-  let Inst{15-13} = Qd{2-0};
-  let Inst{12-4} = 0b011111100;
-  let Inst{3-0} = RdmDest{3-0};
-}
+// start of mve_bit instructions
 
-class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
-                    string ops, vpred_ops vpred, string cstr,
-                    list<dag> pattern=[]>
-  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
+                    string ops, string cstr, list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
   bits<4> Qd;
   bits<4> Qm;
 
@@ -972,1378 +953,1398 @@ class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
   let Inst{3-1} = Qm{2-0};
 }
 
-class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
-              list<dag> pattern=[]>
-  : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
-                  iname, suffix, "$Qd, $Qm", vpred_r, "",
-                  pattern> {
-  let Inst{28} = U;
-  let Inst{25-23} = 0b101;
-  let Inst{21} = 0b1;
-  let Inst{20-19} = sz{1-0};
-  let Inst{18-16} = 0b000;
-  let Inst{11-6} = 0b111101;
-  let Inst{4} = 0b0;
+def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+                             "vbic", "", "$Qd, $Qn, $Qm", ""> {
+  bits<4> Qn;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b110;
+  let Inst{21-20} = 0b01;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00001;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b1;
   let Inst{0} = 0b0;
 }
 
-multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
-                                list<dag> pattern=[]> {
-  def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
-    let Inst{12} = 0b0;
-  }
-  def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
-    let Inst{12} = 0b1;
-  }
+class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7>
+  : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
+                  suffix, "$Qd, $Qm", ""> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b00;
+  let Inst{12-9} = 0b0000;
+  let Inst{8-7} = bit_8_7;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
-defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
-defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
-defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+def MVE_VREV64_8  : MVE_VREV<"vrev64", "8", 0b00, 0b00>;
+def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>;
+def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>;
+
+def MVE_VREV32_8  : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
+def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
+
+def MVE_VREV16_8  : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
 
 let Predicates = [HasMVEInt] in {
-  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
-            (MVE_VMOVLs16bh MQPR:$src)>;
-  def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
-            (MVE_VMOVLs8bh MQPR:$src)>;
-  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
-            (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
+def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
+          (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
+def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
+          (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
+def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
+          (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
 
-  // zext_inreg 16 -> 32
-  def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
-            (MVE_VMOVLu16bh MQPR:$src)>;
-  // zext_inreg 8 -> 16
-  def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
-            (MVE_VMOVLu8bh MQPR:$src)>;
-}
+def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
+          (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
+def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
+          (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
 
+def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
+          (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
 
-class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
-                    dag immops, list<dag> pattern=[]>
-  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
-                  iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
-  let Inst{28} = U;
-  let Inst{25-23} = 0b101;
-  let Inst{21} = 0b1;
-  let Inst{12} = th;
-  let Inst{11-6} = 0b111101;
+def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
+          (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
+def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
+          (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
+def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
+          (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+}
+
+def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
+                             "vmvn", "", "$Qd, $Qm", ""> {
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-16} = 0b110000;
+  let Inst{12-6} = 0b0010111;
   let Inst{4} = 0b0;
   let Inst{0} = 0b0;
 }
 
-// The immediate VSHLL instructions accept shift counts from 1 up to
-// the lane width (8 or 16), but the full-width shifts have an
-// entirely separate encoding, given below with 'lw' in the name.
-
-class MVE_VSHLL_imm8<string iname, string suffix,
-                     bit U, bit th, list<dag> pattern=[]>
-  : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
-  bits<3> imm;
-  let Inst{20-19} = 0b01;
-  let Inst{18-16} = imm;
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (vnotq  (v16i8 MQPR:$val1))),
+            (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
+  def : Pat<(v8i16 (vnotq  (v8i16 MQPR:$val1))),
+            (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
+  def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),
+            (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
 }
 
-class MVE_VSHLL_imm16<string iname, string suffix,
-                      bit U, bit th, list<dag> pattern=[]>
-  : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
-  bits<4> imm;
-  let Inst{20} = 0b1;
-  let Inst{19-16} = imm;
+class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
+  : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
+                  iname, "", "$Qd, $Qn, $Qm", ""> {
+  bits<4> Qn;
+
+  let Inst{28} = bit_28;
+  let Inst{25-23} = 0b110;
+  let Inst{21-20} = bit_21_20;
+  let Inst{19-17} = Qn{2-0};
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00001;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b1;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VSHLL_imms8bh  : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
-def MVE_VSHLL_imms8th  : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
-def MVE_VSHLL_immu8bh  : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
-def MVE_VSHLL_immu8th  : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
-def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
-def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
-def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
-def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
+def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
+def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
+def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
+def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
 
-class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
-                              bit U, string ops, list<dag> pattern=[]>
-  : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
-                  iname, suffix, ops, vpred_r, "", pattern> {
-  let Inst{28} = U;
-  let Inst{25-23} = 0b100;
-  let Inst{21-20} = 0b11;
-  let Inst{19-18} = size{1-0};
-  let Inst{17-16} = 0b01;
-  let Inst{11-6} = 0b111000;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b1;
-}
+// add ignored suffixes as aliases
 
-multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
-                              string ops, list<dag> pattern=[]> {
-  def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
-    let Inst{12} = 0b0;
-  }
-  def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
-    let Inst{12} = 0b1;
-  }
+foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
+  def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
+  def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
+        (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
 }
 
-defm MVE_VSHLL_lws8  : MVE_VSHLL_lw<"vshll", "s8",  0b00, 0b0, "$Qd, $Qm, #8">;
-defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
-defm MVE_VSHLL_lwu8  : MVE_VSHLL_lw<"vshll", "u8",  0b00, 0b1, "$Qd, $Qm, #8">;
-defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 
-class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
-               dag immops, list<dag> pattern=[]>
-  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
-                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
-                  pattern> {
-  bits<5> imm;
+  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 
-  let Inst{28} = bit_28;
-  let Inst{25-23} = 0b101;
-  let Inst{21} = 0b0;
-  let Inst{20-16} = imm{4-0};
-  let Inst{12} = bit_12;
-  let Inst{11-6} = 0b111111;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b1;
-}
+  def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 
-def MVE_VRSHRNi16bh : MVE_VxSHRN<
-    "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
-}
-def MVE_VRSHRNi16th : MVE_VxSHRN<
-    "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
-}
-def MVE_VRSHRNi32bh : MVE_VxSHRN<
-    "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
-}
-def MVE_VRSHRNi32th : MVE_VxSHRN<
-    "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
-}
+  def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 
-def MVE_VSHRNi16bh : MVE_VxSHRN<
-    "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
-}
-def MVE_VSHRNi16th : MVE_VxSHRN<
-    "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
+  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
+            (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 }
-def MVE_VSHRNi32bh : MVE_VxSHRN<
-    "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
+
+class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
+  : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
+          iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
+  bits<8> imm;
+  bits<4> Qd;
+
+  let Inst{28} = imm{7};
+  let Inst{27-23} = 0b11111;
+  let Inst{22} = Qd{3};
+  let Inst{21-19} = 0b000;
+  let Inst{18-16} = imm{6-4};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = 0b0;
+  let Inst{11-8} = cmode;
+  let Inst{7-6} = 0b01;
+  let Inst{4} = 0b1;
+  let Inst{3-0} = imm{3-0};
 }
-def MVE_VSHRNi32th : MVE_VxSHRN<
-    "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
+
+class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
+  : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
+  let Inst{5} = 0b0;
 }
 
-class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
-                 list<dag> pattern=[]>
-  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
-                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
-                  pattern> {
-  bits<5> imm;
+def MVE_VORRIZ0v4i32  : MVE_VORR<"i32", 0b0001, expzero00>;
+def MVE_VORRIZ0v8i16  : MVE_VORR<"i16", 0b1001, expzero00>;
+def MVE_VORRIZ8v4i32  : MVE_VORR<"i32", 0b0011, expzero08>;
+def MVE_VORRIZ8v8i16  : MVE_VORR<"i16", 0b1011, expzero08>;
+def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
+def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
 
-  let Inst{28} = bit_28;
-  let Inst{25-23} = 0b101;
-  let Inst{21} = 0b0;
-  let Inst{20-16} = imm{4-0};
-  let Inst{12} = bit_12;
-  let Inst{11-6} = 0b111111;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
-}
+def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
+    (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
 
-def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
-    "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
+def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
+    (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
+
+class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
+  : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
+  let Inst{5} = 0b1;
 }
-def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
-    "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
+
+def MVE_VBICIZ0v4i32  : MVE_VBIC<"i32", 0b0001, expzero00>;
+def MVE_VBICIZ0v8i16  : MVE_VBIC<"i16", 0b1001, expzero00>;
+def MVE_VBICIZ8v4i32  : MVE_VBIC<"i32", 0b0011, expzero08>;
+def MVE_VBICIZ8v8i16  : MVE_VBIC<"i16", 0b1011, expzero08>;
+def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
+def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
+
+def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
+    (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+
+class MVE_VMOV_lane_direction {
+  bit bit_20;
+  dag oops;
+  dag iops;
+  string ops;
+  string cstr;
 }
-def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
-    "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
+def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
+  let bit_20 = 0b1;
+  let oops = (outs rGPR:$Rt);
+  let iops = (ins MQPR:$Qd);
+  let ops = "$Rt, $Qd$Idx";
+  let cstr = "";
 }
-def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
-    "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
+def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
+  let bit_20 = 0b0;
+  let oops = (outs MQPR:$Qd);
+  let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
+  let ops = "$Qd$Idx, $Rt";
+  let cstr = "$Qd = $Qd_src";
 }
 
-def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
-    "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
+class MVE_VMOV_lane<string suffix, bit U, dag indexop,
+                    MVE_VMOV_lane_direction dir>
+  : MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
+                       "vmov", suffix, dir.ops, dir.cstr, []> {
+  bits<4> Qd;
+  bits<4> Rt;
+
+  let Inst{31-24} = 0b11101110;
+  let Inst{23} = U;
+  let Inst{20} = dir.bit_20;
+  let Inst{19-17} = Qd{2-0};
+  let Inst{15-12} = Rt{3-0};
+  let Inst{11-8} = 0b1011;
+  let Inst{7} = Qd{3};
+  let Inst{4-0} = 0b10000;
 }
-def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
-    "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
-  let Inst{20-19} = 0b01;
+
+class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
+    : MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
+  bits<2> Idx;
+  let Inst{22} = 0b0;
+  let Inst{6-5} = 0b00;
+  let Inst{16} = Idx{1};
+  let Inst{21} = Idx{0};
+
+  let Predicates = [HasFPRegsV8_1M];
 }
-def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
-    "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
+
+class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
+  : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
+  bits<3> Idx;
+  let Inst{22} = 0b0;
+  let Inst{5} = 0b1;
+  let Inst{16} = Idx{2};
+  let Inst{21} = Idx{1};
+  let Inst{6} = Idx{0};
 }
-def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
-    "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
-  let Inst{20} = 0b1;
+
+class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
+  : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
+  bits<4> Idx;
+  let Inst{22} = 0b1;
+  let Inst{16} = Idx{3};
+  let Inst{21} = Idx{2};
+  let Inst{6} = Idx{1};
+  let Inst{5} = Idx{0};
 }
 
-class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
-                   dag immops, list<dag> pattern=[]>
-  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
-                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
-                  pattern> {
-  bits<5> imm;
+def MVE_VMOV_from_lane_32  : MVE_VMOV_lane_32<            MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_32    : MVE_VMOV_lane_32<            MVE_VMOV_to_lane>;
+def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
+def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_16    : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
+def MVE_VMOV_from_lane_s8  : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
+def MVE_VMOV_from_lane_u8  : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
+def MVE_VMOV_to_lane_8     : MVE_VMOV_lane_8 <  "8", 0b0, MVE_VMOV_to_lane>;
 
-  let Inst{25-23} = 0b101;
-  let Inst{21} = 0b0;
-  let Inst{20-16} = imm{4-0};
-  let Inst{12} = bit_12;
-  let Inst{11-6} = 0b111101;
-  let Inst{4} = 0b0;
-  let Inst{0} = bit_0;
-}
+let Predicates = [HasMVEInt] in {
+  def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
+            (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
+  def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
+            (INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
 
-multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
-  def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
-    let Inst{28} = 0b0;
-    let Inst{20-19} = 0b01;
-  }
-  def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
-    let Inst{28} = 0b1;
-    let Inst{20-19} = 0b01;
-  }
-  def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
-    let Inst{28} = 0b0;
-    let Inst{20} = 0b1;
-  }
-  def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
-    let Inst{28} = 0b1;
-    let Inst{20} = 0b1;
-  }
-}
+  def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
+            (COPY_TO_REGCLASS
+              (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
+  def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
 
-defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
-defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
-defm MVE_VQSHRNbh  : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
-defm MVE_VQSHRNth  : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
+  def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_8  MQPR:$src1, rGPR:$src2, imm:$lane)>;
+  def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
 
-// end of mve_imm_shift instructions
+  def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
+  def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
+  def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
+  def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
+            (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
 
-// start of mve_shift instructions
+  def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_8  (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
 
-class MVE_shift_by_vec<string iname, string suffix, bit U,
-                       bits<2> size, bit bit_4, bit bit_8>
-  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
-           iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
-  // Shift instructions which take a vector of shift counts
+  // Floating point patterns, still enabled under HasMVEInt
+  def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
+            (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
+  def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
+            (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
+
+  def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
+            (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
+  def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane),
+            (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>;
+
+  def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+  def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
+            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
+  def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
+            (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+}
+
+// end of mve_bit instructions
+
+// start of MVE Integer instructions
+
+class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
+          iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
   bits<4> Qd;
-  bits<4> Qm;
   bits<4> Qn;
+  bits<4> Qm;
 
-  let Inst{28} = U;
-  let Inst{25-24} = 0b11;
-  let Inst{23} = 0b0;
   let Inst{22} = Qd{3};
   let Inst{21-20} = size;
   let Inst{19-17} = Qn{2-0};
-  let Inst{16} = 0b0;
   let Inst{15-13} = Qd{2-0};
-  let Inst{12-9} = 0b0010;
-  let Inst{8} = bit_8;
   let Inst{7} = Qn{3};
   let Inst{6} = 0b1;
   let Inst{5} = Qm{3};
-  let Inst{4} = bit_4;
   let Inst{3-1} = Qm{2-0};
-  let Inst{0} = 0b0;
-}
-
-multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
-  def s8  : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
-  def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
-  def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
-  def u8  : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
-  def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
-  def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
 }
 
-defm MVE_VSHL_by_vec   : mve_shift_by_vec_multi<"vshl",   0b0, 0b0>;
-defm MVE_VQSHL_by_vec  : mve_shift_by_vec_multi<"vqshl",  0b1, 0b0>;
-defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
-defm MVE_VRSHL_by_vec  : mve_shift_by_vec_multi<"vrshl",  0b0, 0b1>;
-
-class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
-                         string ops, vpred_ops vpred, string cstr,
-                         list<dag> pattern=[]>
-  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
-  bits<4> Qd;
-  bits<4> Qm;
+class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_int<"vmul", suffix, size, pattern> {
 
-  let Inst{23} = 0b1;
-  let Inst{22} = Qd{3};
-  let Inst{15-13} = Qd{2-0};
-  let Inst{12-11} = 0b00;
-  let Inst{7-6} = 0b01;
-  let Inst{5} = Qm{3};
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b01001;
   let Inst{4} = 0b1;
-  let Inst{3-1} = Qm{2-0};
   let Inst{0} = 0b0;
 }
 
-class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
-  : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
-                       !con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
-                       "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
-  bits<6> imm;
-  let Inst{28} = 0b1;
-  let Inst{25-24} = 0b11;
-  let Inst{21-16} = imm;
-  let Inst{10-9} = 0b10;
-  let Inst{8} = bit_8;
-}
-
-def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
-  let Inst{21-19} = 0b001;
-}
-
-def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
-  let Inst{21-20} = 0b01;
-}
-
-def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
-  let Inst{21} = 0b1;
-}
-
-def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
-  let Inst{21-19} = 0b001;
-}
-
-def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
-  let Inst{21-20} = 0b01;
-}
+def MVE_VMULt1i8  : MVE_VMULt1<"i8", 0b00>;
+def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>;
+def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>;
 
-def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
-  let Inst{21} = 0b1;
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 }
 
-class MVE_VQSHL_imm<string suffix, dag imm>
-  : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
-                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
-                       vpred_r, ""> {
-  bits<6> imm;
-
-  let Inst{25-24} = 0b11;
-  let Inst{21-16} = imm;
-  let Inst{10-8} = 0b111;
-}
+class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
+                  list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
 
-def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21-19} = 0b001;
+  let Inst{28} = rounding;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b01011;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21-19} = 0b001;
-}
+class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
+class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
 
-def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21-20} = 0b01;
-}
+def MVE_VQDMULHi8   : MVE_VQDMULH<"s8",  0b00>;
+def MVE_VQDMULHi16  : MVE_VQDMULH<"s16", 0b01>;
+def MVE_VQDMULHi32  : MVE_VQDMULH<"s32", 0b10>;
 
-def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21-20} = 0b01;
-}
+def MVE_VQRDMULHi8  : MVE_VQRDMULH<"s8",  0b00>;
+def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
+def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
 
-def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21} = 0b1;
-}
+class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
+                    list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
 
-def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21} = 0b1;
+  let Inst{28} = subtract;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b01000;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-class MVE_VQSHLU_imm<string suffix, dag imm>
-  : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
-                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
-                       vpred_r, ""> {
-  bits<6> imm;
+class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>;
+class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]>
+  : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>;
 
-  let Inst{28} = 0b1;
-  let Inst{25-24} = 0b11;
-  let Inst{21-16} = imm;
-  let Inst{10-8} = 0b110;
-}
+def MVE_VADDi8  : MVE_VADD<"i8",  0b00>;
+def MVE_VADDi16 : MVE_VADD<"i16", 0b01>;
+def MVE_VADDi32 : MVE_VADD<"i32", 0b10>;
 
-def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
-  let Inst{21-19} = 0b001;
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 }
 
-def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
-  let Inst{21-20} = 0b01;
-}
+def MVE_VSUBi8  : MVE_VSUB<"i8",  0b00>;
+def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>;
+def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>;
 
-def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
-  let Inst{21} = 0b1;
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+            (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+  def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+            (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+  def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
+            (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
 }
 
-class MVE_VRSHR_imm<string suffix, dag imm>
-  : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
-                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
-                       vpred_r, ""> {
-  bits<6> imm;
+class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
+                   bits<2> size, list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
 
-  let Inst{25-24} = 0b11;
-  let Inst{21-16} = imm;
-  let Inst{10-8} = 0b010;
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-10} = 0b000;
+  let Inst{9} = subtract;
+  let Inst{8} = 0b0;
+  let Inst{4} = 0b1;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21-19} = 0b001;
-}
+class MVE_VQADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>;
+class MVE_VQSUB<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>;
 
-def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21-19} = 0b001;
-}
+def MVE_VQADDs8  : MVE_VQADD<"s8",  0b0, 0b00>;
+def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>;
+def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>;
+def MVE_VQADDu8  : MVE_VQADD<"u8",  0b1, 0b00>;
+def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>;
+def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>;
 
-def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21-20} = 0b01;
-}
+def MVE_VQSUBs8  : MVE_VQSUB<"s8",  0b0, 0b00>;
+def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>;
+def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>;
+def MVE_VQSUBu8  : MVE_VQSUB<"u8",  0b1, 0b00>;
+def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>;
+def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>;
 
-def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21-20} = 0b01;
-}
+class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_int<"vabd", suffix, size, pattern> {
 
-def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21} = 0b1;
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00111;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21} = 0b1;
-}
+def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
+def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
+def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
+def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
+def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
+def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
 
-class MVE_VSHR_imm<string suffix, dag imm>
-  : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
-                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
-                       vpred_r, ""> {
-  bits<6> imm;
+class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+  : MVE_int<"vrhadd", suffix, size, pattern> {
 
-  let Inst{25-24} = 0b11;
-  let Inst{21-16} = imm;
-  let Inst{10-8} = 0b000;
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-8} = 0b00001;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21-19} = 0b001;
-}
+def MVE_VRHADDs8  : MVE_VRHADD<"s8", 0b0, 0b00>;
+def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
+def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
+def MVE_VRHADDu8  : MVE_VRHADD<"u8", 0b1, 0b00>;
+def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
+def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
 
-def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21-19} = 0b001;
-}
+class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
+                   bits<2> size, list<dag> pattern=[]>
+  : MVE_int<iname, suffix, size, pattern> {
 
-def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21-20} = 0b01;
+  let Inst{28} = U;
+  let Inst{25-23} = 0b110;
+  let Inst{16} = 0b0;
+  let Inst{12-10} = 0b000;
+  let Inst{9} = subtract;
+  let Inst{8} = 0b0;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21-20} = 0b01;
-}
+class MVE_VHADD<string suffix, bit U, bits<2> size,
+              list<dag> pattern=[]>
+  : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
+class MVE_VHSUB<string suffix, bit U, bits<2> size,
+              list<dag> pattern=[]>
+  : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
 
-def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
-  let Inst{28} = 0b0;
-  let Inst{21} = 0b1;
-}
+def MVE_VHADDs8  : MVE_VHADD<"s8",  0b0, 0b00>;
+def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
+def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
+def MVE_VHADDu8  : MVE_VHADD<"u8",  0b1, 0b00>;
+def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
+def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
 
-def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
-  let Inst{28} = 0b1;
-  let Inst{21} = 0b1;
-}
+def MVE_VHSUBs8  : MVE_VHSUB<"s8",  0b0, 0b00>;
+def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
+def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
+def MVE_VHSUBu8  : MVE_VHSUB<"u8",  0b1, 0b00>;
+def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
+def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
 
-class MVE_VSHL_imm<string suffix, dag imm>
-  : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
-                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
-                       vpred_r, ""> {
-  bits<6> imm;
+class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
+          "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
+  bits<4> Qd;
+  bits<4> Rt;
 
   let Inst{28} = 0b0;
-  let Inst{25-24} = 0b11;
-  let Inst{21-16} = imm;
-  let Inst{10-8} = 0b101;
+  let Inst{25-23} = 0b101;
+  let Inst{22} = B;
+  let Inst{21-20} = 0b10;
+  let Inst{19-17} = Qd{2-0};
+  let Inst{16} = 0b0;
+  let Inst{15-12} = Rt;
+  let Inst{11-8} = 0b1011;
+  let Inst{7} = Qd{3};
+  let Inst{6} = 0b0;
+  let Inst{5} = E;
+  let Inst{4-0} = 0b10000;
 }
 
-def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
-  let Inst{21-19} = 0b001;
-}
+def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
+def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
+def MVE_VDUP8  : MVE_VDUP<"8",  0b1, 0b0>;
 
-def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
-  let Inst{21-20} = 0b01;
-}
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
+            (MVE_VDUP8  rGPR:$elem)>;
+  def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
+            (MVE_VDUP16 rGPR:$elem)>;
+  def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
+            (MVE_VDUP32 rGPR:$elem)>;
 
-def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
-  let Inst{21} = 0b1;
-}
-// end of mve_shift instructions
+  def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
+            (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
+  // For the 16-bit and 8-bit vduplanes we don't care about the signedness
+  // of the lane move operation as we only want the lowest 8/16 bits anyway.
+  def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
+            (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+  def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
+            (MVE_VDUP8  (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
 
-// start of mve_bit instructions
+  def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
+            (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
+  def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
+            (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
 
-class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
-                    string ops, string cstr, list<dag> pattern=[]>
-  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
+  def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
+            (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
+  def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
+            (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+}
+
+
+class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
+                         list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
+          iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
   bits<4> Qd;
   bits<4> Qm;
 
   let Inst{22} = Qd{3};
+  let Inst{19-18} = size{1-0};
   let Inst{15-13} = Qd{2-0};
   let Inst{5} = Qm{3};
   let Inst{3-1} = Qm{2-0};
 }
 
-def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
-                             "vbic", "", "$Qd, $Qn, $Qm", ""> {
-  bits<4> Qn;
-
-  let Inst{28} = 0b0;
-  let Inst{25-23} = 0b110;
-  let Inst{21-20} = 0b01;
-  let Inst{19-17} = Qn{2-0};
-  let Inst{16} = 0b0;
-  let Inst{12-8} = 0b00001;
-  let Inst{7} = Qn{3};
-  let Inst{6} = 0b1;
-  let Inst{4} = 0b1;
-  let Inst{0} = 0b0;
-}
-
-class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7>
-  : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
-                  suffix, "$Qd, $Qm", ""> {
+class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
+                   bit count_zeroes, list<dag> pattern=[]>
+  : MVEIntSingleSrc<iname, suffix, size, pattern> {
 
   let Inst{28} = 0b1;
   let Inst{25-23} = 0b111;
   let Inst{21-20} = 0b11;
-  let Inst{19-18} = size;
   let Inst{17-16} = 0b00;
-  let Inst{12-9} = 0b0000;
-  let Inst{8-7} = bit_8_7;
+  let Inst{12-8} = 0b00100;
+  let Inst{7} = count_zeroes;
   let Inst{6} = 0b1;
   let Inst{4} = 0b0;
   let Inst{0} = 0b0;
 }
 
-def MVE_VREV64_8  : MVE_VREV<"vrev64", "8", 0b00, 0b00>;
-def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>;
-def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>;
-
-def MVE_VREV32_8  : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
-def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
-
-def MVE_VREV16_8  : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
-
-let Predicates = [HasMVEInt] in {
-def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
-          (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
-def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
-          (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
-def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
-          (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
-
-def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
-          (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
-def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
-          (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
+def MVE_VCLSs8  : MVE_VCLSCLZ<"vcls", "s8",  0b00, 0b0>;
+def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
+def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
 
-def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
-          (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
+def MVE_VCLZs8  : MVE_VCLSCLZ<"vclz", "i8",  0b00, 0b1>;
+def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
+def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
 
-def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
-          (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
-def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
-          (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
-def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
-          (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
-}
+class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
+                      list<dag> pattern=[]>
+  : MVEIntSingleSrc<iname, suffix, size, pattern> {
 
-def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
-                             "vmvn", "", "$Qd, $Qm", ""> {
   let Inst{28} = 0b1;
   let Inst{25-23} = 0b111;
-  let Inst{21-16} = 0b110000;
-  let Inst{12-6} = 0b0010111;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
-}
-
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (vnotq  (v16i8 MQPR:$val1))),
-            (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
-  def : Pat<(v8i16 (vnotq  (v8i16 MQPR:$val1))),
-            (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
-  def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),
-            (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
-}
-
-class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
-  : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
-                  iname, "", "$Qd, $Qn, $Qm", ""> {
-  bits<4> Qn;
-
-  let Inst{28} = bit_28;
-  let Inst{25-23} = 0b110;
-  let Inst{21-20} = bit_21_20;
-  let Inst{19-17} = Qn{2-0};
-  let Inst{16} = 0b0;
-  let Inst{12-8} = 0b00001;
-  let Inst{7} = Qn{3};
+  let Inst{21-20} = 0b11;
+  let Inst{17-16} = 0b01;
+  let Inst{12-8} = 0b00011;
+  let Inst{7} = negate;
   let Inst{6} = 0b1;
-  let Inst{4} = 0b1;
+  let Inst{4} = 0b0;
   let Inst{0} = 0b0;
 }
 
-def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>;
-def MVE_VORN : MVE_bit_ops<"vorn", 0b11, 0b0>;
-def MVE_VORR : MVE_bit_ops<"vorr", 0b10, 0b0>;
-def MVE_VAND : MVE_bit_ops<"vand", 0b00, 0b0>;
-
-// add ignored suffixes as aliases
-
-foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f32"] in {
-  def : MVEInstAlias<"vbic${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
-        (MVE_VBIC MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
-  def : MVEInstAlias<"veor${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
-        (MVE_VEOR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
-  def : MVEInstAlias<"vorn${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
-        (MVE_VORN MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
-  def : MVEInstAlias<"vorr${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
-        (MVE_VORR MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
-  def : MVEInstAlias<"vand${vp}." # s # "\t$QdSrc, $QnSrc, $QmSrc",
-        (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
-}
+def MVE_VABSs8  : MVE_VABSNEG_int<"vabs", "s8",  0b00, 0b0>;
+def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
+def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
 
 let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
-            (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
-            (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
-            (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
-  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
-            (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
-            (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
-            (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-}
-
-class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
-  : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
-          iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
-  bits<8> imm;
-  bits<4> Qd;
-
-  let Inst{28} = imm{7};
-  let Inst{27-23} = 0b11111;
-  let Inst{22} = Qd{3};
-  let Inst{21-19} = 0b000;
-  let Inst{18-16} = imm{6-4};
-  let Inst{15-13} = Qd{2-0};
-  let Inst{12} = 0b0;
-  let Inst{11-8} = cmode;
-  let Inst{7-6} = 0b01;
-  let Inst{4} = 0b1;
-  let Inst{3-0} = imm{3-0};
-}
-
-class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
-  : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
-  let Inst{5} = 0b0;
-}
-
-def MVE_VORRIZ0v4i32  : MVE_VORR<"i32", 0b0001, expzero00>;
-def MVE_VORRIZ0v8i16  : MVE_VORR<"i16", 0b1001, expzero00>;
-def MVE_VORRIZ8v4i32  : MVE_VORR<"i32", 0b0011, expzero08>;
-def MVE_VORRIZ8v8i16  : MVE_VORR<"i16", 0b1011, expzero08>;
-def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
-def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
-
-def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
-    (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
-    (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
-    (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
-    (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
-    (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
-    (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-
-def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
-    (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
-
-class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
-  : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
-  let Inst{5} = 0b1;
+  def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
+            (v16i8 (MVE_VABSs8 $v))>;
+  def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
+            (v8i16 (MVE_VABSs16 $v))>;
+  def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
+            (v4i32 (MVE_VABSs32 $v))>;
 }
 
-def MVE_VBICIZ0v4i32  : MVE_VBIC<"i32", 0b0001, expzero00>;
-def MVE_VBICIZ0v8i16  : MVE_VBIC<"i16", 0b1001, expzero00>;
-def MVE_VBICIZ8v4i32  : MVE_VBIC<"i32", 0b0011, expzero08>;
-def MVE_VBICIZ8v8i16  : MVE_VBIC<"i16", 0b1011, expzero08>;
-def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
-def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
-
-def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
-    (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
-    (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
-    (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
-    (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
-    (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
-    (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
+def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
+def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
 
-class MVE_VMOV_lane_direction {
-  bit bit_20;
-  dag oops;
-  dag iops;
-  string ops;
-  string cstr;
-}
-def MVE_VMOV_from_lane : MVE_VMOV_lane_direction {
-  let bit_20 = 0b1;
-  let oops = (outs rGPR:$Rt);
-  let iops = (ins MQPR:$Qd);
-  let ops = "$Rt, $Qd$Idx";
-  let cstr = "";
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
+            (v16i8 (MVE_VNEGs8 $v))>;
+  def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
+            (v8i16 (MVE_VNEGs16 $v))>;
+  def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
+            (v4i32 (MVE_VNEGs32 $v))>;
 }
-def MVE_VMOV_to_lane : MVE_VMOV_lane_direction {
-  let bit_20 = 0b0;
-  let oops = (outs MQPR:$Qd);
-  let iops = (ins MQPR:$Qd_src, rGPR:$Rt);
-  let ops = "$Qd$Idx, $Rt";
-  let cstr = "$Qd = $Qd_src";
+
+class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
+                   bit negate, list<dag> pattern=[]>
+  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+
+  let Inst{28} = 0b1;
+  let Inst{25-23} = 0b111;
+  let Inst{21-20} = 0b11;
+  let Inst{17-16} = 0b00;
+  let Inst{12-8} = 0b00111;
+  let Inst{7} = negate;
+  let Inst{6} = 0b1;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-class MVE_VMOV_lane<string suffix, bit U, dag indexop,
-                    MVE_VMOV_lane_direction dir>
-  : MVE_VMOV_lane_base<dir.oops, !con(dir.iops, indexop), NoItinerary,
-                       "vmov", suffix, dir.ops, dir.cstr, []> {
+def MVE_VQABSs8  : MVE_VQABSNEG<"vqabs", "s8",  0b00, 0b0>;
+def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
+def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
+
+def MVE_VQNEGs8  : MVE_VQABSNEG<"vqneg", "s8",  0b00, 0b1>;
+def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
+def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
+
+class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
+                  dag iops, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
+          vpred_r, "", pattern> {
+  bits<13> imm;
   bits<4> Qd;
-  bits<4> Rt;
 
-  let Inst{31-24} = 0b11101110;
-  let Inst{23} = U;
-  let Inst{20} = dir.bit_20;
-  let Inst{19-17} = Qd{2-0};
-  let Inst{15-12} = Rt{3-0};
-  let Inst{11-8} = 0b1011;
-  let Inst{7} = Qd{3};
-  let Inst{4-0} = 0b10000;
+  let Inst{28} = imm{7};
+  let Inst{25-23} = 0b111;
+  let Inst{22} = Qd{3};
+  let Inst{21-19} = 0b000;
+  let Inst{18-16} = imm{6-4};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = 0b0;
+  let Inst{11-8} = cmode{3-0};
+  let Inst{7-6} = 0b01;
+  let Inst{5} = op;
+  let Inst{4} = 0b1;
+  let Inst{3-0} = imm{3-0};
+
+  let DecoderMethod = "DecodeMVEModImmInstruction";
 }
 
-class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
-    : MVE_VMOV_lane<"32", 0b0, (ins MVEVectorIndex<4>:$Idx), dir> {
-  bits<2> Idx;
-  let Inst{22} = 0b0;
-  let Inst{6-5} = 0b00;
-  let Inst{16} = Idx{1};
-  let Inst{21} = Idx{0};
+let isReMaterializable = 1 in {
+let isAsCheapAsAMove = 1 in {
+def MVE_VMOVimmi8  : MVE_mod_imm<"vmov", "i8",  {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
+def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
+  let Inst{9} = imm{9};
+}
+def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
+  let Inst{11-8} = imm{11-8};
+}
+def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
+def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
+} // let isAsCheapAsAMove = 1
 
-  let Predicates = [HasFPRegsV8_1M];
+def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
+  let Inst{9} = imm{9};
+}
+def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
+  let Inst{11-8} = imm{11-8};
 }
+} // let isReMaterializable = 1
 
-class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
-  : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<8>:$Idx), dir> {
-  bits<3> Idx;
-  let Inst{22} = 0b0;
-  let Inst{5} = 0b1;
-  let Inst{16} = Idx{2};
-  let Inst{21} = Idx{1};
-  let Inst{6} = Idx{0};
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
+            (v16i8 (MVE_VMOVimmi8  nImmSplatI8:$simm))>;
+  def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
+            (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
+  def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
+            (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
+
+  def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
+            (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
+  def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
+            (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
+
+  def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
+            (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
 }
 
-class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
-  : MVE_VMOV_lane<suffix, U, (ins MVEVectorIndex<16>:$Idx), dir> {
-  bits<4> Idx;
-  let Inst{22} = 0b1;
-  let Inst{16} = Idx{3};
-  let Inst{21} = Idx{2};
-  let Inst{6} = Idx{1};
-  let Inst{5} = Idx{0};
+class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
+                   bit bit_12, list<dag> pattern=[]>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
+          NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
+          pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
+
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b100;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size;
+  let Inst{17-16} = 0b11;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111010;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b0;
+  let Inst{3-1} = Qm{2-0};
+  let Inst{0} = 0b1;
 }
 
-def MVE_VMOV_from_lane_32  : MVE_VMOV_lane_32<            MVE_VMOV_from_lane>;
-def MVE_VMOV_to_lane_32    : MVE_VMOV_lane_32<            MVE_VMOV_to_lane>;
-def MVE_VMOV_from_lane_s16 : MVE_VMOV_lane_16<"s16", 0b0, MVE_VMOV_from_lane>;
-def MVE_VMOV_from_lane_u16 : MVE_VMOV_lane_16<"u16", 0b1, MVE_VMOV_from_lane>;
-def MVE_VMOV_to_lane_16    : MVE_VMOV_lane_16< "16", 0b0, MVE_VMOV_to_lane>;
-def MVE_VMOV_from_lane_s8  : MVE_VMOV_lane_8 < "s8", 0b0, MVE_VMOV_from_lane>;
-def MVE_VMOV_from_lane_u8  : MVE_VMOV_lane_8 < "u8", 0b1, MVE_VMOV_from_lane>;
-def MVE_VMOV_to_lane_8     : MVE_VMOV_lane_8 <  "8", 0b0, MVE_VMOV_to_lane>;
+def MVE_VMAXAs8  : MVE_VMINMAXA<"vmaxa", "s8",  0b00, 0b0>;
+def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
+def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(extractelt (v2f64 MQPR:$src), imm:$lane),
-            (f64 (EXTRACT_SUBREG MQPR:$src, (DSubReg_f64_reg imm:$lane)))>;
-  def : Pat<(insertelt (v2f64 MQPR:$src1), DPR:$src2, imm:$lane),
-            (INSERT_SUBREG (v2f64 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), DPR:$src2, (DSubReg_f64_reg imm:$lane))>;
+def MVE_VMINAs8  : MVE_VMINMAXA<"vmina", "s8",  0b00, 0b1>;
+def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
+def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
 
-  def : Pat<(extractelt (v4i32 MQPR:$src), imm:$lane),
-            (COPY_TO_REGCLASS
-              (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>;
-  def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane),
-            (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+// end of MVE Integer instructions
 
-  def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane),
-            (MVE_VMOV_to_lane_8  MQPR:$src1, rGPR:$src2, imm:$lane)>;
-  def : Pat<(vector_insert (v8i16 MQPR:$src1), rGPR:$src2, imm:$lane),
-            (MVE_VMOV_to_lane_16 MQPR:$src1, rGPR:$src2, imm:$lane)>;
+// start of mve_imm_shift instructions
 
-  def : Pat<(ARMvgetlanes (v16i8 MQPR:$src), imm:$lane),
-            (MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
-  def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
-            (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
-  def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
-            (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
-  def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
-            (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
+def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
+                      (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
+                      NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
+                      vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
+  bits<5> imm;
+  bits<4> Qd;
+  bits<4> RdmDest;
 
-  def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
-            (MVE_VMOV_to_lane_8  (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
-  def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
-            (MVE_VMOV_to_lane_16 (v8i16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
-  def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
-            (MVE_VMOV_to_lane_32 (v4i32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+  let Inst{28} = 0b0;
+  let Inst{25-23} = 0b101;
+  let Inst{22} = Qd{3};
+  let Inst{21} = 0b1;
+  let Inst{20-16} = imm{4-0};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-4} = 0b011111100;
+  let Inst{3-0} = RdmDest{3-0};
+}
 
-  // Floating point patterns, still enabled under HasMVEInt
-  def : Pat<(extractelt (v4f32 MQPR:$src), imm:$lane),
-            (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), SPR)>;
-  def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
-            (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
+class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
+                    string ops, vpred_ops vpred, string cstr,
+                    list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
 
-  def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
-            (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
-  def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane),
-            (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>;
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{5} = Qm{3};
+  let Inst{3-1} = Qm{2-0};
+}
 
-  def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
-            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
-  def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
-            (MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
-  def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
-            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
-  def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
-            (MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+              list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
+                  iname, suffix, "$Qd, $Qm", vpred_r, "",
+                  pattern> {
+  let Inst{28} = U;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = sz{1-0};
+  let Inst{18-16} = 0b000;
+  let Inst{11-6} = 0b111101;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
+}
+
+multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
+                                list<dag> pattern=[]> {
+  def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
+    let Inst{12} = 0b0;
+  }
+  def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
+    let Inst{12} = 0b1;
+  }
 }
 
-// end of mve_bit instructions
-
-// start of MVE Integer instructions
+defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
+defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
+defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
+defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
 
-class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
-  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
-          iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
-  bits<4> Qd;
-  bits<4> Qn;
-  bits<4> Qm;
+let Predicates = [HasMVEInt] in {
+  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
+            (MVE_VMOVLs16bh MQPR:$src)>;
+  def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8),
+            (MVE_VMOVLs8bh MQPR:$src)>;
+  def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
+            (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
 
-  let Inst{22} = Qd{3};
-  let Inst{21-20} = size;
-  let Inst{19-17} = Qn{2-0};
-  let Inst{15-13} = Qd{2-0};
-  let Inst{7} = Qn{3};
-  let Inst{6} = 0b1;
-  let Inst{5} = Qm{3};
-  let Inst{3-1} = Qm{2-0};
+  // zext_inreg 16 -> 32
+  def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
+            (MVE_VMOVLu16bh MQPR:$src)>;
+  // zext_inreg 8 -> 16
+  def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
+            (MVE_VMOVLu8bh MQPR:$src)>;
 }
 
-class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
-  : MVE_int<"vmul", suffix, size, pattern> {
 
-  let Inst{28} = 0b0;
-  let Inst{25-23} = 0b110;
-  let Inst{16} = 0b0;
-  let Inst{12-8} = 0b01001;
-  let Inst{4} = 0b1;
+class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
+                    dag immops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
+  let Inst{28} = U;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b1;
+  let Inst{12} = th;
+  let Inst{11-6} = 0b111101;
+  let Inst{4} = 0b0;
   let Inst{0} = 0b0;
 }
 
-def MVE_VMULt1i8  : MVE_VMULt1<"i8", 0b00>;
-def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>;
-def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>;
+// The immediate VSHLL instructions accept shift counts from 1 up to
+// the lane width (8 or 16), but the full-width shifts have an
+// entirely separate encoding, given below with 'lw' in the name.
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+class MVE_VSHLL_imm8<string iname, string suffix,
+                     bit U, bit th, list<dag> pattern=[]>
+  : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
+  bits<3> imm;
+  let Inst{20-19} = 0b01;
+  let Inst{18-16} = imm;
 }
 
-class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
-                  list<dag> pattern=[]>
-  : MVE_int<iname, suffix, size, pattern> {
+class MVE_VSHLL_imm16<string iname, string suffix,
+                      bit U, bit th, list<dag> pattern=[]>
+  : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
+  bits<4> imm;
+  let Inst{20} = 0b1;
+  let Inst{19-16} = imm;
+}
 
-  let Inst{28} = rounding;
-  let Inst{25-23} = 0b110;
-  let Inst{16} = 0b0;
-  let Inst{12-8} = 0b01011;
+def MVE_VSHLL_imms8bh  : MVE_VSHLL_imm8 <"vshllb", "s8", 0b0, 0b0>;
+def MVE_VSHLL_imms8th  : MVE_VSHLL_imm8 <"vshllt", "s8", 0b0, 0b1>;
+def MVE_VSHLL_immu8bh  : MVE_VSHLL_imm8 <"vshllb", "u8", 0b1, 0b0>;
+def MVE_VSHLL_immu8th  : MVE_VSHLL_imm8 <"vshllt", "u8", 0b1, 0b1>;
+def MVE_VSHLL_imms16bh : MVE_VSHLL_imm16<"vshllb", "s16", 0b0, 0b0>;
+def MVE_VSHLL_imms16th : MVE_VSHLL_imm16<"vshllt", "s16", 0b0, 0b1>;
+def MVE_VSHLL_immu16bh : MVE_VSHLL_imm16<"vshllb", "u16", 0b1, 0b0>;
+def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
+
+class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
+                              bit U, string ops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
+                  iname, suffix, ops, vpred_r, "", pattern> {
+  let Inst{28} = U;
+  let Inst{25-23} = 0b100;
+  let Inst{21-20} = 0b11;
+  let Inst{19-18} = size{1-0};
+  let Inst{17-16} = 0b01;
+  let Inst{11-6} = 0b111000;
   let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
+  let Inst{0} = 0b1;
 }
 
-class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
-  : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
-class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
-  : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
-
-def MVE_VQDMULHi8   : MVE_VQDMULH<"s8",  0b00>;
-def MVE_VQDMULHi16  : MVE_VQDMULH<"s16", 0b01>;
-def MVE_VQDMULHi32  : MVE_VQDMULH<"s32", 0b10>;
+multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
+                              string ops, list<dag> pattern=[]> {
+  def bh : MVE_VSHLL_by_lane_width<iname#"b", suffix, sz, U, ops, pattern> {
+    let Inst{12} = 0b0;
+  }
+  def th : MVE_VSHLL_by_lane_width<iname#"t", suffix, sz, U, ops, pattern> {
+    let Inst{12} = 0b1;
+  }
+}
 
-def MVE_VQRDMULHi8  : MVE_VQRDMULH<"s8",  0b00>;
-def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
-def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
+defm MVE_VSHLL_lws8  : MVE_VSHLL_lw<"vshll", "s8",  0b00, 0b0, "$Qd, $Qm, #8">;
+defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
+defm MVE_VSHLL_lwu8  : MVE_VSHLL_lw<"vshll", "u8",  0b00, 0b1, "$Qd, $Qm, #8">;
+defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
 
-class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
-                    list<dag> pattern=[]>
-  : MVE_int<iname, suffix, size, pattern> {
+class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
+               dag immops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+                  pattern> {
+  bits<5> imm;
 
-  let Inst{28} = subtract;
-  let Inst{25-23} = 0b110;
-  let Inst{16} = 0b0;
-  let Inst{12-8} = 0b01000;
+  let Inst{28} = bit_28;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = imm{4-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111111;
   let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
+  let Inst{0} = 0b1;
 }
 
-class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
-  : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>;
-class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]>
-  : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>;
+def MVE_VRSHRNi16bh : MVE_VxSHRN<
+    "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VRSHRNi16th : MVE_VxSHRN<
+    "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VRSHRNi32bh : MVE_VxSHRN<
+    "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VRSHRNi32th : MVE_VxSHRN<
+    "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
 
-def MVE_VADDi8  : MVE_VADD<"i8",  0b00>;
-def MVE_VADDi16 : MVE_VADD<"i16", 0b01>;
-def MVE_VADDi32 : MVE_VADD<"i32", 0b10>;
+def MVE_VSHRNi16bh : MVE_VxSHRN<
+    "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VSHRNi16th : MVE_VxSHRN<
+    "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VSHRNi32bh : MVE_VxSHRN<
+    "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VSHRNi32th : MVE_VxSHRN<
+    "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
+                 list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+                  pattern> {
+  bits<5> imm;
+
+  let Inst{28} = bit_28;
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = imm{4-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111111;
+  let Inst{4} = 0b0;
+  let Inst{0} = 0b0;
 }
 
-def MVE_VSUBi8  : MVE_VSUB<"i8",  0b00>;
-def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>;
-def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>;
+def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
+    "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
+    "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
+    "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
+    "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
-            (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
-  def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
-            (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
-  def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
-            (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
+    "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
 }
+def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
+    "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+  let Inst{20-19} = 0b01;
+}
+def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
+    "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
+    "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+  let Inst{20} = 0b1;
+}
+
+class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
+                   dag immops, list<dag> pattern=[]>
+  : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
+                  iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
+                  pattern> {
+  bits<5> imm;
 
-class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
-                   bits<2> size, list<dag> pattern=[]>
-  : MVE_int<iname, suffix, size, pattern> {
+  let Inst{25-23} = 0b101;
+  let Inst{21} = 0b0;
+  let Inst{20-16} = imm{4-0};
+  let Inst{12} = bit_12;
+  let Inst{11-6} = 0b111101;
+  let Inst{4} = 0b0;
+  let Inst{0} = bit_0;
+}
 
-  let Inst{28} = U;
-  let Inst{25-23} = 0b110;
-  let Inst{16} = 0b0;
-  let Inst{12-10} = 0b000;
-  let Inst{9} = subtract;
-  let Inst{8} = 0b0;
-  let Inst{4} = 0b1;
-  let Inst{0} = 0b0;
+multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
+  def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+    let Inst{28} = 0b0;
+    let Inst{20-19} = 0b01;
+  }
+  def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+    let Inst{28} = 0b1;
+    let Inst{20-19} = 0b01;
+  }
+  def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+    let Inst{28} = 0b0;
+    let Inst{20} = 0b1;
+  }
+  def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+    let Inst{28} = 0b1;
+    let Inst{20} = 0b1;
+  }
 }
 
-class MVE_VQADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
-  : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>;
-class MVE_VQSUB<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
-  : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>;
+defm MVE_VQRSHRNbh : MVE_VxQRSHRN_types<"vqrshrnb", 0b1, 0b0>;
+defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
+defm MVE_VQSHRNbh  : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
+defm MVE_VQSHRNth  : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
 
-def MVE_VQADDs8  : MVE_VQADD<"s8",  0b0, 0b00>;
-def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>;
-def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>;
-def MVE_VQADDu8  : MVE_VQADD<"u8",  0b1, 0b00>;
-def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>;
-def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>;
+// end of mve_imm_shift instructions
 
-def MVE_VQSUBs8  : MVE_VQSUB<"s8",  0b0, 0b00>;
-def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>;
-def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>;
-def MVE_VQSUBu8  : MVE_VQSUB<"u8",  0b1, 0b00>;
-def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>;
-def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>;
+// start of mve_shift instructions
 
-class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
-  : MVE_int<"vabd", suffix, size, pattern> {
+class MVE_shift_by_vec<string iname, string suffix, bit U,
+                       bits<2> size, bit bit_4, bit bit_8>
+  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
+           iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
+  // Shift instructions which take a vector of shift counts
+  bits<4> Qd;
+  bits<4> Qm;
+  bits<4> Qn;
 
   let Inst{28} = U;
-  let Inst{25-23} = 0b110;
+  let Inst{25-24} = 0b11;
+  let Inst{23} = 0b0;
+  let Inst{22} = Qd{3};
+  let Inst{21-20} = size;
+  let Inst{19-17} = Qn{2-0};
   let Inst{16} = 0b0;
-  let Inst{12-8} = 0b00111;
-  let Inst{4} = 0b0;
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-9} = 0b0010;
+  let Inst{8} = bit_8;
+  let Inst{7} = Qn{3};
+  let Inst{6} = 0b1;
+  let Inst{5} = Qm{3};
+  let Inst{4} = bit_4;
+  let Inst{3-1} = Qm{2-0};
   let Inst{0} = 0b0;
 }
 
-def MVE_VABDs8  : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8  : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
-
-class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
-  : MVE_int<"vrhadd", suffix, size, pattern> {
-
-  let Inst{28} = U;
-  let Inst{25-23} = 0b110;
-  let Inst{16} = 0b0;
-  let Inst{12-8} = 0b00001;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
+multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
+  def s8  : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
+  def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
+  def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
+  def u8  : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
+  def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
+  def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
 }
 
-def MVE_VRHADDs8  : MVE_VRHADD<"s8", 0b0, 0b00>;
-def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
-def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
-def MVE_VRHADDu8  : MVE_VRHADD<"u8", 0b1, 0b00>;
-def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
-def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
+defm MVE_VSHL_by_vec   : mve_shift_by_vec_multi<"vshl",   0b0, 0b0>;
+defm MVE_VQSHL_by_vec  : mve_shift_by_vec_multi<"vqshl",  0b1, 0b0>;
+defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
+defm MVE_VRSHL_by_vec  : mve_shift_by_vec_multi<"vrshl",  0b0, 0b1>;
 
-class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
-                   bits<2> size, list<dag> pattern=[]>
-  : MVE_int<iname, suffix, size, pattern> {
+class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
+                         string ops, vpred_ops vpred, string cstr,
+                         list<dag> pattern=[]>
+  : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+  bits<4> Qd;
+  bits<4> Qm;
 
-  let Inst{28} = U;
-  let Inst{25-23} = 0b110;
-  let Inst{16} = 0b0;
-  let Inst{12-10} = 0b000;
-  let Inst{9} = subtract;
-  let Inst{8} = 0b0;
-  let Inst{4} = 0b0;
+  let Inst{23} = 0b1;
+  let Inst{22} = Qd{3};
+  let Inst{15-13} = Qd{2-0};
+  let Inst{12-11} = 0b00;
+  let Inst{7-6} = 0b01;
+  let Inst{5} = Qm{3};
+  let Inst{4} = 0b1;
+  let Inst{3-1} = Qm{2-0};
   let Inst{0} = 0b0;
 }
 
-class MVE_VHADD<string suffix, bit U, bits<2> size,
-              list<dag> pattern=[]>
-  : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
-class MVE_VHSUB<string suffix, bit U, bits<2> size,
-              list<dag> pattern=[]>
-  : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
-
-def MVE_VHADDs8  : MVE_VHADD<"s8",  0b0, 0b00>;
-def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
-def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
-def MVE_VHADDu8  : MVE_VHADD<"u8",  0b1, 0b00>;
-def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
-def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
-
-def MVE_VHSUBs8  : MVE_VHSUB<"s8",  0b0, 0b00>;
-def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
-def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
-def MVE_VHSUBu8  : MVE_VHSUB<"u8",  0b1, 0b00>;
-def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
-def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
-
-class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
-  : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
-          "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
-  bits<4> Qd;
-  bits<4> Rt;
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
+  : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
+                       "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
+  bits<6> imm;
+  let Inst{28} = 0b1;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-9} = 0b10;
+  let Inst{8} = bit_8;
+}
 
-  let Inst{28} = 0b0;
-  let Inst{25-23} = 0b101;
-  let Inst{22} = B;
-  let Inst{21-20} = 0b10;
-  let Inst{19-17} = Qd{2-0};
-  let Inst{16} = 0b0;
-  let Inst{15-12} = Rt;
-  let Inst{11-8} = 0b1011;
-  let Inst{7} = Qd{3};
-  let Inst{6} = 0b0;
-  let Inst{5} = E;
-  let Inst{4-0} = 0b10000;
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
+  let Inst{21-19} = 0b001;
 }
 
-def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
-def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
-def MVE_VDUP8  : MVE_VDUP<"8",  0b1, 0b0>;
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
+  let Inst{21-20} = 0b01;
+}
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
-            (MVE_VDUP8  rGPR:$elem)>;
-  def : Pat<(v8i16 (ARMvdup (i32 rGPR:$elem))),
-            (MVE_VDUP16 rGPR:$elem)>;
-  def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
-            (MVE_VDUP32 rGPR:$elem)>;
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
+  let Inst{21} = 0b1;
+}
 
-  def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
-            (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
-  // For the 16-bit and 8-bit vduplanes we don't care about the signedness
-  // of the lane move operation as we only want the lowest 8/16 bits anyway.
-  def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
-            (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
-  def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
-            (MVE_VDUP8  (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
+  let Inst{21-19} = 0b001;
+}
 
-  def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
-            (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
-  def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
-            (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
+  let Inst{21-20} = 0b01;
+}
 
-  def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
-            (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
-  def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
-            (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
+  let Inst{21} = 0b1;
 }
 
+class MVE_VQSHL_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
 
-class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
-                         list<dag> pattern=[]>
-  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
-          iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
-  bits<4> Qd;
-  bits<4> Qm;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b111;
+}
 
-  let Inst{22} = Qd{3};
-  let Inst{19-18} = size{1-0};
-  let Inst{15-13} = Qd{2-0};
-  let Inst{5} = Qm{3};
-  let Inst{3-1} = Qm{2-0};
+def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-19} = 0b001;
 }
 
-class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
-                   bit count_zeroes, list<dag> pattern=[]>
-  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-19} = 0b001;
+}
+
+def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-20} = 0b01;
+}
 
+def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
   let Inst{28} = 0b1;
-  let Inst{25-23} = 0b111;
-  let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b00;
-  let Inst{12-8} = 0b00100;
-  let Inst{7} = count_zeroes;
-  let Inst{6} = 0b1;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
+  let Inst{21-20} = 0b01;
 }
 
-def MVE_VCLSs8  : MVE_VCLSCLZ<"vcls", "s8",  0b00, 0b0>;
-def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
-def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21} = 0b1;
+}
 
-def MVE_VCLZs8  : MVE_VCLSCLZ<"vclz", "i8",  0b00, 0b1>;
-def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
-def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21} = 0b1;
+}
 
-class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
-                      list<dag> pattern=[]>
-  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+class MVE_VQSHLU_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
 
   let Inst{28} = 0b1;
-  let Inst{25-23} = 0b111;
-  let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b01;
-  let Inst{12-8} = 0b00011;
-  let Inst{7} = negate;
-  let Inst{6} = 0b1;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b110;
 }
 
-def MVE_VABSs8  : MVE_VABSNEG_int<"vabs", "s8",  0b00, 0b0>;
-def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
-def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
+def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
+  let Inst{21-19} = 0b001;
+}
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
-            (v16i8 (MVE_VABSs8 $v))>;
-  def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
-            (v8i16 (MVE_VABSs16 $v))>;
-  def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
-            (v4i32 (MVE_VABSs32 $v))>;
+def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
+  let Inst{21-20} = 0b01;
 }
 
-def MVE_VNEGs8  : MVE_VABSNEG_int<"vneg", "s8",  0b00, 0b1>;
-def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
-def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
+  let Inst{21} = 0b1;
+}
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
-            (v16i8 (MVE_VNEGs8 $v))>;
-  def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
-            (v8i16 (MVE_VNEGs16 $v))>;
-  def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
-            (v4i32 (MVE_VNEGs32 $v))>;
+class MVE_VRSHR_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b010;
 }
 
-class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
-                   bit negate, list<dag> pattern=[]>
-  : MVEIntSingleSrc<iname, suffix, size, pattern> {
+def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-19} = 0b001;
+}
 
+def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
   let Inst{28} = 0b1;
-  let Inst{25-23} = 0b111;
-  let Inst{21-20} = 0b11;
-  let Inst{17-16} = 0b00;
-  let Inst{12-8} = 0b00111;
-  let Inst{7} = negate;
-  let Inst{6} = 0b1;
-  let Inst{4} = 0b0;
-  let Inst{0} = 0b0;
+  let Inst{21-19} = 0b001;
 }
 
-def MVE_VQABSs8  : MVE_VQABSNEG<"vqabs", "s8",  0b00, 0b0>;
-def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
-def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
-
-def MVE_VQNEGs8  : MVE_VQABSNEG<"vqneg", "s8",  0b00, 0b1>;
-def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
-def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
+def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-20} = 0b01;
+}
 
-class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
-                  dag iops, list<dag> pattern=[]>
-  : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
-          vpred_r, "", pattern> {
-  bits<13> imm;
-  bits<4> Qd;
+def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-20} = 0b01;
+}
 
-  let Inst{28} = imm{7};
-  let Inst{25-23} = 0b111;
-  let Inst{22} = Qd{3};
-  let Inst{21-19} = 0b000;
-  let Inst{18-16} = imm{6-4};
-  let Inst{15-13} = Qd{2-0};
-  let Inst{12} = 0b0;
-  let Inst{11-8} = cmode{3-0};
-  let Inst{7-6} = 0b01;
-  let Inst{5} = op;
-  let Inst{4} = 0b1;
-  let Inst{3-0} = imm{3-0};
+def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21} = 0b1;
+}
 
-  let DecoderMethod = "DecodeMVEModImmInstruction";
+def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21} = 0b1;
 }
 
-let isReMaterializable = 1 in {
-let isAsCheapAsAMove = 1 in {
-def MVE_VMOVimmi8  : MVE_mod_imm<"vmov", "i8",  {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
-def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
-  let Inst{9} = imm{9};
+class MVE_VSHR_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
+
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b000;
 }
-def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
-  let Inst{11-8} = imm{11-8};
+
+def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-19} = 0b001;
 }
-def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
-def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
-} // let isAsCheapAsAMove = 1
 
-def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
-  let Inst{9} = imm{9};
+def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-19} = 0b001;
 }
-def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
-  let Inst{11-8} = imm{11-8};
+
+def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21-20} = 0b01;
 }
-} // let isReMaterializable = 1
 
-let Predicates = [HasMVEInt] in {
-  def : Pat<(v16i8 (ARMvmovImm timm:$simm)),
-            (v16i8 (MVE_VMOVimmi8  nImmSplatI8:$simm))>;
-  def : Pat<(v8i16 (ARMvmovImm timm:$simm)),
-            (v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
-  def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
-            (v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
+def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21-20} = 0b01;
+}
 
-  def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
-            (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
-  def : Pat<(v4i32 (ARMvmvnImm timm:$simm)),
-            (v4i32 (MVE_VMVNimmi32 nImmVMOVI32:$simm))>;
+def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b0;
+  let Inst{21} = 0b1;
+}
 
-  def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
-            (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
+  let Inst{28} = 0b1;
+  let Inst{21} = 0b1;
 }
 
-class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
-                   bit bit_12, list<dag> pattern=[]>
-  : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
-          NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
-          pattern> {
-  bits<4> Qd;
-  bits<4> Qm;
+class MVE_VSHL_imm<string suffix, dag imm>
+  : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
+                       !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+                       vpred_r, ""> {
+  bits<6> imm;
 
   let Inst{28} = 0b0;
-  let Inst{25-23} = 0b100;
-  let Inst{22} = Qd{3};
-  let Inst{21-20} = 0b11;
-  let Inst{19-18} = size;
-  let Inst{17-16} = 0b11;
-  let Inst{15-13} = Qd{2-0};
-  let Inst{12} = bit_12;
-  let Inst{11-6} = 0b111010;
-  let Inst{5} = Qm{3};
-  let Inst{4} = 0b0;
-  let Inst{3-1} = Qm{2-0};
-  let Inst{0} = 0b1;
+  let Inst{25-24} = 0b11;
+  let Inst{21-16} = imm;
+  let Inst{10-8} = 0b101;
 }
 
-def MVE_VMAXAs8  : MVE_VMINMAXA<"vmaxa", "s8",  0b00, 0b0>;
-def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
-def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
+def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
+  let Inst{21-19} = 0b001;
+}
 
-def MVE_VMINAs8  : MVE_VMINMAXA<"vmina", "s8",  0b00, 0b1>;
-def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
-def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
+def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
+  let Inst{21-20} = 0b01;
+}
 
-// end of MVE Integer instructions
+def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
+  let Inst{21} = 0b1;
+}
+
+// end of mve_shift instructions
 
 // start of MVE Floating Point instructions
 

From 0bf0b8ff7c7edcad0f79e4c39dddd58bc0d62a72 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana.picus@linaro.org>
Date: Mon, 15 Jul 2019 11:33:41 +0000
Subject: [PATCH 091/451] [libFuzzer] Disable fork.test on AArch64

This crashes sporadically on our AArch64 buildbots. Disable for now.

llvm-svn: 366055
---
 compiler-rt/test/fuzzer/fork.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/fuzzer/fork.test b/compiler-rt/test/fuzzer/fork.test
index bcc9b550e70ed..e0f348b2bff1b 100644
--- a/compiler-rt/test/fuzzer/fork.test
+++ b/compiler-rt/test/fuzzer/fork.test
@@ -1,4 +1,4 @@
-# UNSUPPORTED: darwin, freebsd
+# UNSUPPORTED: darwin, freebsd, aarch64
 BINGO: BINGO
 RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
 RUN: not %run %t-SimpleTest -fork=1 2>&1 | FileCheck %s --check-prefix=BINGO

From 6e89887642f4eca39a8e2339adb32c176aa67ce9 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 15 Jul 2019 11:35:39 +0000
Subject: [PATCH 092/451] [ARM] MVE Vector Shifts

This adds basic lowering for MVE shifts. There are many shifts in MVE, but the
instructions handled here are:
 VSHL (imm)
 VSHRu (imm)
 VSHRs (imm)
 VSHL (vector)
 VSHL (register)

MVE, like NEON before it, doesn't have shift right by a vector (or register).
We instead have to negate the amount and shift in the opposite direction. This
means we have to convert any SHR's into a form of SHL (that is still signed or
unsigned) with a negated condition and selecting from there. MVE still does
have shifting by an immediate for SHL, ASR and LSR.

This adds lowering for these and for register forms, which work well for shift
lefts but may require an extra fold of neg(vdup(x)) -> vdup(neg(x)) to potentially
work optimally for right shifts.

Differential Revision: https://reviews.llvm.org/D64212

llvm-svn: 366056
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp |  13 +-
 llvm/lib/Target/ARM/ARMInstrInfo.td     |  11 +
 llvm/lib/Target/ARM/ARMInstrMVE.td      |  55 ++++
 llvm/lib/Target/ARM/ARMInstrNEON.td     |  98 +++---
 llvm/test/CodeGen/Thumb2/mve-shifts.ll  | 391 ++++++++++++++++++++++++
 5 files changed, 508 insertions(+), 60 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-shifts.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 5773c3ba04ed3..a67adde262d99 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -250,6 +250,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::SHL, VT, Custom);
+    setOperationAction(ISD::SRA, VT, Custom);
+    setOperationAction(ISD::SRL, VT, Custom);
     setOperationAction(ISD::SMIN, VT, Legal);
     setOperationAction(ISD::SMAX, VT, Legal);
     setOperationAction(ISD::UMIN, VT, Legal);
@@ -5718,10 +5721,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // We essentially have two forms here. Shift by an immediate and shift by a
-  // vector register. We cannot easily match shift by an immediate in tablegen
-  // so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.  For shifting
-  // by a vector, we don't have VSHR, only VSHL (which can be signed or
-  // unsigned, and a negative shift indicates a shift right).
+  // vector register (there are also shift by a gpr, but that is just handled
+  // with a tablegen pattern). We cannot easily match shift by an immediate in
+  // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
+  // For shifting by a vector, we don't have VSHR, only VSHL (which can be
+  // signed or unsigned, and a negative shift indicates a shift right).
   if (N->getOpcode() == ISD::SHL) {
     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
       return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
@@ -12852,7 +12856,6 @@ static SDValue PerformShiftCombine(SDNode *N,
   if (!VT.isVector() || !TLI.isTypeLegal(VT))
     return SDValue();
 
-  assert(ST->hasNEON() && "unexpected vector shift");
   int64_t Cnt;
 
   switch (N->getOpcode()) {
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 3c629fef6825d..e35145463852b 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -254,6 +254,17 @@ def ARMvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
 def ARMvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
 def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
 
+
+def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                        SDTCisVT<2, i32>]>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                     SDTCisSameAs<0, 2>,]>;
+def ARMvshlImm   : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
+def ARMvshrsImm  : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
+def ARMvshruImm  : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+def ARMvshls     : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
+def ARMvshlu     : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+
 def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
                     [SDNPHasChain]>;
 
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 9eec9a6f096ab..1091bcc9d1f02 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2119,6 +2119,22 @@ defm MVE_VQSHL_by_vec  : mve_shift_by_vec_multi<"vqshl",  0b1, 0b0>;
 defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
 defm MVE_VRSHL_by_vec  : mve_shift_by_vec_multi<"vrshl",  0b0, 0b1>;
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+            (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+  def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+            (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+  def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+            (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+
+  def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
+            (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
+  def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
+            (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
+  def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
+            (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+}
+
 class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
                          string ops, vpred_ops vpred, string cstr,
                          list<dag> pattern=[]>
@@ -2344,6 +2360,29 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
   let Inst{21} = 0b1;
 }
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+  def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+
+  def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
+            (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
+  def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
+            (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
+  def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
+            (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+}
+
 // end of mve_shift instructions
 
 // start of MVE Floating Point instructions
@@ -3353,6 +3392,22 @@ defm MVE_VRSHL_qr  : MVE_VxSHL_qr_types<"vrshl",  0b0, 0b1>;
 defm MVE_VQSHL_qr  : MVE_VxSHL_qr_types<"vqshl",  0b1, 0b0>;
 defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
 
+let Predicates = [HasMVEInt] in {
+  def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+            (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+            (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+            (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+
+  def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
+            (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
+            (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
+  def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
+            (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+}
+
 class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
   : MVE_qDest_rSrc<iname, suffix, pattern> {
 
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 64d949f79e010..806681df102c1 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -493,26 +493,14 @@ def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
 def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
 def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
 
-// Vector Shifts
-def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
-                                     SDTCisSameAs<0, 2>,]>;
-
-def NEONvshls        : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
-def NEONvshlu        : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
-
 // Types for vector shift by immediates.  The "SHX" version is for long and
 // narrow operations where the source and destination vectors have different
 // types.  The "SHINS" version is for shift and insert operations.
-def SDTARMVSHIMM     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
-                                            SDTCisVT<2, i32>]>;
 def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
                                             SDTCisVT<2, i32>]>;
 def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
                                             SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
 
-def NEONvshlImm      : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
-def NEONvshrsImm     : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
-def NEONvshruImm     : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
 def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
 
 def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
@@ -4269,11 +4257,11 @@ defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
                             int_arm_neon_vraddhn, 1>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8  (trunc (NEONvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8  (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
           (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
           (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
           (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
 }
 
@@ -5027,11 +5015,11 @@ defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
                             int_arm_neon_vrsubhn, 0>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8  (trunc (NEONvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8  (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
           (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
           (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
           (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
 }
 
@@ -5522,7 +5510,7 @@ def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
 
 def abd_shr :
     PatFrag<(ops node:$in1, node:$in2, node:$shift),
-            (NEONvshrsImm (sub (zext node:$in1),
+            (ARMvshrsImm (sub (zext node:$in1),
                             (zext node:$in2)), (i32 $shift))>;
 
 let Predicates = [HasNEON] in {
@@ -5790,56 +5778,56 @@ defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
                             "vshl", "u", int_arm_neon_vshiftu>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (NEONvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
           (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v4i16 (NEONvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
           (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v2i32 (NEONvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
           (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v1i64 (NEONvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
           (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v16i8 (NEONvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
           (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v8i16 (NEONvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
           (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v4i32 (NEONvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
           (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v2i64 (NEONvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
           (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
 
-def : Pat<(v8i8 (NEONvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
           (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v4i16 (NEONvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
           (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v2i32 (NEONvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
           (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v1i64 (NEONvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
           (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
-def : Pat<(v16i8 (NEONvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
           (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v8i16 (NEONvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
           (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v4i32 (NEONvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
           (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
-def : Pat<(v2i64 (NEONvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
           (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
 
 }
 
 //   VSHL     : Vector Shift Left (Immediate)
-defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshlImm>;
+defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
 
 //   VSHR     : Vector Shift Right (Immediate)
 defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
-                            NEONvshrsImm>;
+                            ARMvshrsImm>;
 defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
-                            NEONvshruImm>;
+                            ARMvshruImm>;
 
 //   VSHLL    : Vector Shift Left Long
 defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
-  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (sext node:$LHS), node:$RHS)>>;
+  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
 defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
-  PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (zext node:$LHS), node:$RHS)>>;
+  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
 
 //   VSHLL    : Vector Shift Left Long (with maximum shift count)
 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
@@ -5858,37 +5846,37 @@ def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
                           v2i64, v2i32, imm32>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i16 (NEONvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
           (VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
           (VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
           (VSHLLi32 DPR:$Rn, 32)>;
 }
 
 //   VSHRN    : Vector Shift Right and Narrow
 defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
                            PatFrag<(ops node:$Rn, node:$amt),
-                                   (trunc (NEONvshrsImm node:$Rn, node:$amt))>>;
+                                   (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
 
 let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
+def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
           (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
-def : Pat<(v4i16 (trunc (NEONvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
+def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
           (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
-def : Pat<(v2i32 (trunc (NEONvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
+def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
           (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
 }
 
@@ -5952,8 +5940,8 @@ defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
                            NEONvqrshrnsuImm>;
 
 //   VSRA     : Vector Shift Right and Accumulate
-defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrsImm>;
-defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshruImm>;
+defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
+defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
 //   VRSRA    : Vector Rounding Shift Right and Accumulate
 defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
 defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
diff --git a/llvm/test/CodeGen/Thumb2/mve-shifts.ll b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
new file mode 100644
index 0000000000000..4e6e5ae7f6acc
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
@@ -0,0 +1,391 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: shl_qq_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <16 x i8> %src1, %src2
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: shl_qq_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <8 x i16> %src1, %src2
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: shl_qq_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <4 x i32> %src1, %src2
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: shru_qq_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s8 q1, q1
+; CHECK-NEXT:    vshl.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <16 x i8> %src1, %src2
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shru_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: shru_qq_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s16 q1, q1
+; CHECK-NEXT:    vshl.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <8 x i16> %src1, %src2
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shru_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: shru_qq_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s32 q1, q1
+; CHECK-NEXT:    vshl.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <4 x i32> %src1, %src2
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: shrs_qq_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s8 q1, q1
+; CHECK-NEXT:    vshl.s8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <16 x i8> %src1, %src2
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shrs_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: shrs_qq_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s16 q1, q1
+; CHECK-NEXT:    vshl.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <8 x i16> %src1, %src2
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shrs_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: shrs_qq_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vneg.s32 q1, q1
+; CHECK-NEXT:    vshl.s32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <4 x i32> %src1, %src2
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shl_qi_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.i8 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qi_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shl_qi_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.i16 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qi_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shl_qi_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.i32 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shru_qi_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshr.u8 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shru_qi_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shru_qi_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshr.u16 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shru_qi_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shru_qi_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshr.u32 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shrs_qi_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshr.s8 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <16 x i8> %src1, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shrs_qi_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shrs_qi_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshr.s16 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <8 x i16> %src1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shrs_qi_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shrs_qi_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshr.s32 q0, q0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <4 x i32> %src1, <i32 4, i32 4, i32 4, i32 4>
+  ret <4 x i32> %0
+}
+
+
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
+; CHECK-LABEL: shl_qr_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.u8 q0, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <16 x i8> undef, i8 %src2, i32 0
+  %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
+  %0 = shl <16 x i8> %src1, %s
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qr_int16_t(<8 x i16> %src1, i16 %src2) {
+; CHECK-LABEL: shl_qr_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.u16 q0, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <8 x i16> undef, i16 %src2, i32 0
+  %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+  %0 = shl <8 x i16> %src1, %s
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qr_int32_t(<4 x i32> %src1, i32 %src2) {
+; CHECK-LABEL: shl_qr_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vshl.u32 q0, r0
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <4 x i32> undef, i32 %src2, i32 0
+  %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+  %0 = shl <4 x i32> %src1, %s
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
+; CHECK-LABEL: shru_qr_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vdup.8 q1, r0
+; CHECK-NEXT:    vneg.s8 q1, q1
+; CHECK-NEXT:    vshl.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <16 x i8> undef, i8 %src2, i32 0
+  %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
+  %0 = lshr <16 x i8> %src1, %s
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shru_qr_int16_t(<8 x i16> %src1, i16 %src2) {
+; CHECK-LABEL: shru_qr_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vdup.16 q1, r0
+; CHECK-NEXT:    vneg.s16 q1, q1
+; CHECK-NEXT:    vshl.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <8 x i16> undef, i16 %src2, i32 0
+  %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+  %0 = lshr <8 x i16> %src1, %s
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shru_qr_int32_t(<4 x i32> %src1, i32 %src2) {
+; CHECK-LABEL: shru_qr_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vdup.32 q1, r0
+; CHECK-NEXT:    vneg.s32 q1, q1
+; CHECK-NEXT:    vshl.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <4 x i32> undef, i32 %src2, i32 0
+  %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+  %0 = lshr <4 x i32> %src1, %s
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
+; CHECK-LABEL: shrs_qr_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vdup.8 q1, r0
+; CHECK-NEXT:    vneg.s8 q1, q1
+; CHECK-NEXT:    vshl.s8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <16 x i8> undef, i8 %src2, i32 0
+  %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
+  %0 = ashr <16 x i8> %src1, %s
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shrs_qr_int16_t(<8 x i16> %src1, i16 %src2) {
+; CHECK-LABEL: shrs_qr_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vdup.16 q1, r0
+; CHECK-NEXT:    vneg.s16 q1, q1
+; CHECK-NEXT:    vshl.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <8 x i16> undef, i16 %src2, i32 0
+  %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
+  %0 = ashr <8 x i16> %src1, %s
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shrs_qr_int32_t(<4 x i32> %src1, i32 %src2) {
+; CHECK-LABEL: shrs_qr_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vdup.32 q1, r0
+; CHECK-NEXT:    vneg.s32 q1, q1
+; CHECK-NEXT:    vshl.s32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <4 x i32> undef, i32 %src2, i32 0
+  %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
+  %0 = ashr <4 x i32> %src1, %s
+  ret <4 x i32> %0
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
+; CHECK-LABEL: shl_qiv_int8_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    adr r0, .LCPI27_0
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vshl.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI27_0:
+; CHECK-NEXT:    .byte 1 @ 0x1
+; CHECK-NEXT:    .byte 2 @ 0x2
+; CHECK-NEXT:    .byte 3 @ 0x3
+; CHECK-NEXT:    .byte 4 @ 0x4
+; CHECK-NEXT:    .byte 1 @ 0x1
+; CHECK-NEXT:    .byte 2 @ 0x2
+; CHECK-NEXT:    .byte 3 @ 0x3
+; CHECK-NEXT:    .byte 4 @ 0x4
+; CHECK-NEXT:    .byte 1 @ 0x1
+; CHECK-NEXT:    .byte 2 @ 0x2
+; CHECK-NEXT:    .byte 3 @ 0x3
+; CHECK-NEXT:    .byte 4 @ 0x4
+; CHECK-NEXT:    .byte 1 @ 0x1
+; CHECK-NEXT:    .byte 2 @ 0x2
+; CHECK-NEXT:    .byte 3 @ 0x3
+; CHECK-NEXT:    .byte 4 @ 0x4
+entry:
+  %0 = shl <16 x i8> %src1, <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
+  ret <16 x i8> %0
+}
+
+define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
+; CHECK-LABEL: shl_qiv_int16_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    adr r0, .LCPI28_0
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vshl.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI28_0:
+; CHECK-NEXT:    .short 1 @ 0x1
+; CHECK-NEXT:    .short 2 @ 0x2
+; CHECK-NEXT:    .short 3 @ 0x3
+; CHECK-NEXT:    .short 4 @ 0x4
+; CHECK-NEXT:    .short 1 @ 0x1
+; CHECK-NEXT:    .short 2 @ 0x2
+; CHECK-NEXT:    .short 3 @ 0x3
+; CHECK-NEXT:    .short 4 @ 0x4
+entry:
+  %0 = shl <8 x i16> %src1, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
+  ret <8 x i16> %0
+}
+
+define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
+; CHECK-LABEL: shl_qiv_int32_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    adr r0, .LCPI29_0
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vshl.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI29_0:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 3 @ 0x3
+; CHECK-NEXT:    .long 4 @ 0x4
+entry:
+  %0 = shl <4 x i32> %src1, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %0
+}

From 8d9b9f6bf2cde253ff89369399a45172b10af512 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 15 Jul 2019 11:47:54 +0000
Subject: [PATCH 093/451] [LLD][ELF] - Minor simplification. NFC.

This removes a call to `object::getSymbol<ELFT>`.
We used this function in a next way: it was given an
array of symbols and index and returned either a symbol
at the index given or a error.

This function was removed in D64631.
(rL366052, but was reverted because of LLD compilation error
that I didn't know about).

It does not make much sense to keep this function on LLVM side
only for LLD, because having only a list of symbols and the index it
is not able to produce a valueable error message about context anyways.

llvm-svn: 366057
---
 lld/ELF/InputFiles.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 919012fd20ce9..470d877f3fbf3 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -466,9 +466,11 @@ template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
 template <class ELFT>
 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
                                               const Elf_Shdr &sec) {
-  const Elf_Sym *sym =
-      CHECK(object::getSymbol<ELFT>(this->getELFSyms<ELFT>(), sec.sh_info), this);
-  StringRef signature = CHECK(sym->getName(this->stringTable), this);
+  typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
+  if (sec.sh_info >= symbols.size())
+    fatal(toString(this) + ": invalid symbol index");
+  const typename ELFT::Sym &sym = symbols[sec.sh_info];
+  StringRef signature = CHECK(sym.getName(this->stringTable), this);
 
   // As a special case, if a symbol is a section symbol and has no name,
   // we use a section name as a signature.
@@ -477,7 +479,7 @@ StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
   // standard, but GNU gold 1.14 (the newest version as of July 2017) or
   // older produce such sections as outputs for the -r option, so we need
   // a bug-compatibility.
-  if (signature.empty() && sym->getType() == STT_SECTION)
+  if (signature.empty() && sym.getType() == STT_SECTION)
     return getSectionName(sec);
   return signature;
 }

From 224816ba169923f4ac9a1e8ac28ce10b991db239 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Mon, 15 Jul 2019 11:53:39 +0000
Subject: [PATCH 094/451] Recommit r366052 "[obj2yaml] - Rework tool's error
 reporting logic for ELF target."

No changes, LLD code was updated in r366057.

Original commit message:

ELF.h contains two getSymbol methods
which seems to be used only from obj2yaml.

One of these methods calls another, which in turn
contains untested error message which doesn't
provide enough information.

Problem is that after improving only just that message,
obj2yaml will not show it,
("Error reading file: yaml: Invalid data was
encountered while parsing the file" message will be shown instead),
because internal errors handling of tool is based on ErrorOr<> class which
stores a error code and as a result can only show a predefined error string, what
actually isn't very useful.

In this patch, I rework obj2yaml's error reporting system
for ELF targets to use Error  Expected<> classes.
Also, I improve the error message produced
by getSymbol for demonstration of the new functionality.

Differential revision: https://reviews.llvm.org/D64631

llvm-svn: 366058
---
 llvm/include/llvm/Object/ELF.h                |  23 +-
 llvm/test/tools/obj2yaml/section-group.test   |  24 +-
 .../obj2yaml/special-symbol-indices.yaml      |   2 +-
 llvm/tools/obj2yaml/elf2yaml.cpp              | 319 +++++++++---------
 llvm/tools/obj2yaml/obj2yaml.cpp              |  13 +-
 llvm/tools/obj2yaml/obj2yaml.h                |   2 +-
 6 files changed, 199 insertions(+), 184 deletions(-)

diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 8fe9f2919c5b4..7bc6dc4620c7d 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -354,22 +354,19 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
   return getSection(Index);
 }
 
-template <class ELFT>
-inline Expected<const typename ELFT::Sym *>
-getSymbol(typename ELFT::SymRange Symbols, uint32_t Index) {
-  if (Index >= Symbols.size())
-    // TODO: this error is untested.
-    return createError("invalid symbol index");
-  return &Symbols[Index];
-}
-
 template <class ELFT>
 Expected<const typename ELFT::Sym *>
 ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
-  auto SymtabOrErr = symbols(Sec);
-  if (!SymtabOrErr)
-    return SymtabOrErr.takeError();
-  return object::getSymbol<ELFT>(*SymtabOrErr, Index);
+  auto SymsOrErr = symbols(Sec);
+  if (!SymsOrErr)
+    return SymsOrErr.takeError();
+
+  Elf_Sym_Range Symbols = *SymsOrErr;
+  if (Index >= Symbols.size())
+    return createError("unable to get symbol from section " +
+                       getSecIndexForError(this, Sec) +
+                       ": invalid symbol index (" + Twine(Index) + ")");
+  return &Symbols[Index];
 }
 
 template <class ELFT>
diff --git a/llvm/test/tools/obj2yaml/section-group.test b/llvm/test/tools/obj2yaml/section-group.test
index 78af00cd138b3..cd520cb1b361f 100644
--- a/llvm/test/tools/obj2yaml/section-group.test
+++ b/llvm/test/tools/obj2yaml/section-group.test
@@ -1,6 +1,6 @@
 ## Checks that the tool is able to read section groups from ELF.
 
-# RUN: yaml2obj %s > %t1.o
+# RUN: yaml2obj --docnum=1 %s > %t1.o
 # RUN: llvm-readobj --elf-section-groups %t1.o | FileCheck %s -check-prefix=OBJ
 # RUN: obj2yaml %t1.o | FileCheck %s --check-prefix YAML
 
@@ -46,3 +46,25 @@ Symbols:
   - Name:    signature
     Type:    STT_OBJECT
     Section: .rodata
+
+## Check obj2yaml report an error when sh_info field of
+## group section contains invalid (too large) signature symbol index.
+
+# RUN: yaml2obj --docnum=2 %s > %t2.o
+# RUN: not obj2yaml %t2.o 2>&1 | FileCheck %s --check-prefix ERR
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .group
+    Type: SHT_GROUP
+    Link: .symtab
+    Info: 0xFF
+    Members:
+      - SectionOrType: GRP_COMDAT
+
+# ERR: Error reading file: {{.*}}2.o: unable to get symbol from section [index 2]: invalid symbol index (255)
diff --git a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml b/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
index 25550c944f385..fcc2a705f9c75 100644
--- a/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
+++ b/llvm/test/tools/obj2yaml/special-symbol-indices.yaml
@@ -51,4 +51,4 @@ Symbols:
 ## shn_xindex.o contains a symbol with st_shndx == SHN_XINDEX.
 ## We do not support it at this moment.
 # RUN: not obj2yaml %S/Inputs/shn_xindex.o 2>&1 | FileCheck %s --check-prefix=ERR
-# ERR: Error reading file: {{.*}}shn_xindex.o: Feature not yet implemented.
+# ERR: Error reading file: {{.*}}shn_xindex.o: SHN_XINDEX symbols are not supported
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index 7404bae2a08ac..bd27c103403eb 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -44,31 +44,31 @@ class ELFDumper {
   const object::ELFFile<ELFT> &Obj;
   ArrayRef<Elf_Word> ShndxTable;
 
-  std::error_code dumpSymbols(const Elf_Shdr *Symtab,
-                              std::vector<ELFYAML::Symbol> &Symbols);
-  std::error_code dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-                             StringRef StrTable, ELFYAML::Symbol &S);
-  std::error_code dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
-  std::error_code dumpCommonRelocationSection(const Elf_Shdr *Shdr,
-                                              ELFYAML::RelocationSection &S);
+  Error dumpSymbols(const Elf_Shdr *Symtab,
+                    std::vector<ELFYAML::Symbol> &Symbols);
+  Error dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                   StringRef StrTable, ELFYAML::Symbol &S);
+  Error dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
+  Error dumpCommonRelocationSection(const Elf_Shdr *Shdr,
+                                    ELFYAML::RelocationSection &S);
   template <class RelT>
-  std::error_code dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
-                                 ELFYAML::Relocation &R);
-  
-  ErrorOr<ELFYAML::DynamicSection *> dumpDynamicSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::RelocationSection *> dumpRelocSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::RawContentSection *>
+  Error dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
+                       ELFYAML::Relocation &R);
+
+  Expected<ELFYAML::DynamicSection *> dumpDynamicSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::RelocationSection *> dumpRelocSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::RawContentSection *>
   dumpContentSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::NoBitsSection *> dumpNoBitsSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::VerdefSection *> dumpVerdefSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::SymverSection *> dumpSymverSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::VerneedSection *> dumpVerneedSection(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::Group *> dumpGroup(const Elf_Shdr *Shdr);
-  ErrorOr<ELFYAML::MipsABIFlags *> dumpMipsABIFlags(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::NoBitsSection *> dumpNoBitsSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::VerdefSection *> dumpVerdefSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::SymverSection *> dumpSymverSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::VerneedSection *> dumpVerneedSection(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::Group *> dumpGroup(const Elf_Shdr *Shdr);
+  Expected<ELFYAML::MipsABIFlags *> dumpMipsABIFlags(const Elf_Shdr *Shdr);
 
 public:
   ELFDumper(const object::ELFFile<ELFT> &O);
-  ErrorOr<ELFYAML::Object *> dump();
+  Expected<ELFYAML::Object *> dump();
 };
 
 }
@@ -134,7 +134,7 @@ ELFDumper<ELFT>::getUniquedSymbolName(const Elf_Sym *Sym, StringRef StrTable,
   return Name;
 }
 
-template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
+template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   auto Y = make_unique<ELFYAML::Object>();
 
   // Dump header. We do not dump SHEntSize, SHOffset, SHNum and SHStrNdx field.
@@ -152,7 +152,7 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   // Dump sections
   auto SectionsOrErr = Obj.sections();
   if (!SectionsOrErr)
-    return errorToErrorCode(SectionsOrErr.takeError());
+    return SectionsOrErr.takeError();
   Sections = *SectionsOrErr;
   SectionNames.resize(Sections.size());
 
@@ -160,20 +160,20 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   // to access the deduplicated symbol names that we also create here.
   for (const Elf_Shdr &Sec : Sections) {
     if (Sec.sh_type == ELF::SHT_SYMTAB)
-      if (auto EC = dumpSymbols(&Sec, Y->Symbols))
-        return EC;
+      if (Error E = dumpSymbols(&Sec, Y->Symbols))
+        return std::move(E);
     if (Sec.sh_type == ELF::SHT_DYNSYM)
-      if (auto EC = dumpSymbols(&Sec, Y->DynamicSymbols))
-        return EC;
+      if (Error E = dumpSymbols(&Sec, Y->DynamicSymbols))
+        return std::move(E);
   }
 
   for (const Elf_Shdr &Sec : Sections) {
     switch (Sec.sh_type) {
     case ELF::SHT_DYNAMIC: {
-      ErrorOr<ELFYAML::DynamicSection *> S = dumpDynamicSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::DynamicSection *> SecOrErr = dumpDynamicSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_NULL:
@@ -185,65 +185,66 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
     case ELF::SHT_SYMTAB_SHNDX: {
       auto TableOrErr = Obj.getSHNDXTable(Sec);
       if (!TableOrErr)
-        return errorToErrorCode(TableOrErr.takeError());
+        return TableOrErr.takeError();
       ShndxTable = *TableOrErr;
       break;
     }
     case ELF::SHT_REL:
     case ELF::SHT_RELA: {
-      ErrorOr<ELFYAML::RelocationSection *> S = dumpRelocSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::RelocationSection *> SecOrErr = dumpRelocSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GROUP: {
-      ErrorOr<ELFYAML::Group *> G = dumpGroup(&Sec);
-      if (std::error_code EC = G.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(G.get()));
+      Expected<ELFYAML::Group *> GroupOrErr = dumpGroup(&Sec);
+      if (!GroupOrErr)
+        return GroupOrErr.takeError();
+      Y->Sections.emplace_back(*GroupOrErr);
       break;
     }
     case ELF::SHT_MIPS_ABIFLAGS: {
-      ErrorOr<ELFYAML::MipsABIFlags *> G = dumpMipsABIFlags(&Sec);
-      if (std::error_code EC = G.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(G.get()));
+      Expected<ELFYAML::MipsABIFlags *> SecOrErr = dumpMipsABIFlags(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_NOBITS: {
-      ErrorOr<ELFYAML::NoBitsSection *> S = dumpNoBitsSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::NoBitsSection *> SecOrErr = dumpNoBitsSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GNU_verdef: {
-      ErrorOr<ELFYAML::VerdefSection *> S = dumpVerdefSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::VerdefSection *> SecOrErr = dumpVerdefSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GNU_versym: {
-      ErrorOr<ELFYAML::SymverSection *> S = dumpSymverSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::SymverSection *> SecOrErr = dumpSymverSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     case ELF::SHT_GNU_verneed: {
-      ErrorOr<ELFYAML::VerneedSection *> S = dumpVerneedSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::VerneedSection *> SecOrErr = dumpVerneedSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
       break;
     }
     default: {
-      ErrorOr<ELFYAML::RawContentSection *> S = dumpContentSection(&Sec);
-      if (std::error_code EC = S.getError())
-        return EC;
-      Y->Sections.push_back(std::unique_ptr<ELFYAML::Section>(S.get()));
+      Expected<ELFYAML::RawContentSection *> SecOrErr =
+          dumpContentSection(&Sec);
+      if (!SecOrErr)
+        return SecOrErr.takeError();
+      Y->Sections.emplace_back(*SecOrErr);
     }
     }
   }
@@ -252,20 +253,19 @@ template <class ELFT> ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
 }
 
 template <class ELFT>
-std::error_code
-ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
+Error ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
                              std::vector<ELFYAML::Symbol> &Symbols) {
   if (!Symtab)
-    return std::error_code();
+    return Error::success();
 
   auto StrTableOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (!StrTableOrErr)
-    return errorToErrorCode(StrTableOrErr.takeError());
+    return StrTableOrErr.takeError();
   StringRef StrTable = *StrTableOrErr;
 
   auto SymtabOrErr = Obj.symbols(Symtab);
   if (!SymtabOrErr)
-    return errorToErrorCode(SymtabOrErr.takeError());
+    return SymtabOrErr.takeError();
 
   if (Symtab->sh_type == ELF::SHT_SYMTAB) {
     SymTable = *SymtabOrErr;
@@ -279,13 +279,12 @@ ELFDumper<ELFT>::dumpSymbols(const Elf_Shdr *Symtab,
     Symbols.push_back(S);
   }
 
-  return std::error_code();
+  return Error::success();
 }
 
 template <class ELFT>
-std::error_code
-ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-                            StringRef StrTable, ELFYAML::Symbol &S) {
+Error ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                                  StringRef StrTable, ELFYAML::Symbol &S) {
   S.Type = Sym->getType();
   S.Value = Sym->st_value;
   S.Size = Sym->st_size;
@@ -295,56 +294,56 @@ ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
   Expected<StringRef> SymbolNameOrErr =
       getUniquedSymbolName(Sym, StrTable, SymTab);
   if (!SymbolNameOrErr)
-    return errorToErrorCode(SymbolNameOrErr.takeError());
+    return SymbolNameOrErr.takeError();
   S.Name = SymbolNameOrErr.get();
 
   if (Sym->st_shndx >= ELF::SHN_LORESERVE) {
     if (Sym->st_shndx == ELF::SHN_XINDEX)
-      return obj2yaml_error::not_implemented;
+      return createStringError(obj2yaml_error::not_implemented,
+                               "SHN_XINDEX symbols are not supported");
     S.Index = (ELFYAML::ELF_SHN)Sym->st_shndx;
-    return obj2yaml_error::success;
+    return Error::success();
   }
 
   auto ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable);
   if (!ShdrOrErr)
-    return errorToErrorCode(ShdrOrErr.takeError());
+    return ShdrOrErr.takeError();
   const Elf_Shdr *Shdr = *ShdrOrErr;
   if (!Shdr)
-    return obj2yaml_error::success;
+    return Error::success();
 
   auto NameOrErr = getUniquedSectionName(Shdr);
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   S.Section = NameOrErr.get();
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
 template <class RelT>
-std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel,
-                                                const Elf_Shdr *SymTab,
-                                                ELFYAML::Relocation &R) {
+Error ELFDumper<ELFT>::dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
+                                      ELFYAML::Relocation &R) {
   R.Type = Rel->getType(Obj.isMips64EL());
   R.Offset = Rel->r_offset;
   R.Addend = 0;
 
   auto SymOrErr = Obj.getRelocationSymbol(Rel, SymTab);
   if (!SymOrErr)
-    return errorToErrorCode(SymOrErr.takeError());
+    return SymOrErr.takeError();
   const Elf_Sym *Sym = *SymOrErr;
   auto StrTabSec = Obj.getSection(SymTab->sh_link);
   if (!StrTabSec)
-    return errorToErrorCode(StrTabSec.takeError());
+    return StrTabSec.takeError();
   auto StrTabOrErr = Obj.getStringTable(*StrTabSec);
   if (!StrTabOrErr)
-    return errorToErrorCode(StrTabOrErr.takeError());
+    return StrTabOrErr.takeError();
   StringRef StrTab = *StrTabOrErr;
 
   if (Sym) {
     Expected<StringRef> NameOrErr = getUniquedSymbolName(Sym, StrTab, SymTab);
     if (!NameOrErr)
-      return errorToErrorCode(NameOrErr.takeError());
+      return NameOrErr.takeError();
     R.Symbol = NameOrErr.get();
   } else {
     // We have some edge cases of relocations without a symbol associated,
@@ -354,12 +353,12 @@ std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel,
     R.Symbol = "";
   }
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
-std::error_code ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
-                                                   ELFYAML::Section &S) {
+Error ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
+                                         ELFYAML::Section &S) {
   // Dump fields. We do not dump the ShOffset field. When not explicitly
   // set, the value is set by yaml2obj automatically.
   S.Type = Shdr->sh_type;
@@ -372,51 +371,50 @@ std::error_code ELFDumper<ELFT>::dumpCommonSection(const Elf_Shdr *Shdr,
 
   auto NameOrErr = getUniquedSectionName(Shdr);
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   S.Name = NameOrErr.get();
 
   if (Shdr->sh_link != ELF::SHN_UNDEF) {
     auto LinkSection = Obj.getSection(Shdr->sh_link);
     if (LinkSection.takeError())
-      return errorToErrorCode(LinkSection.takeError());
+      return LinkSection.takeError();
     NameOrErr = getUniquedSectionName(*LinkSection);
     if (!NameOrErr)
-      return errorToErrorCode(NameOrErr.takeError());
+      return NameOrErr.takeError();
     S.Link = NameOrErr.get();
   }
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
-std::error_code
-ELFDumper<ELFT>::dumpCommonRelocationSection(const Elf_Shdr *Shdr,
-                                             ELFYAML::RelocationSection &S) {
-  if (std::error_code EC = dumpCommonSection(Shdr, S))
-    return EC;
+Error ELFDumper<ELFT>::dumpCommonRelocationSection(
+    const Elf_Shdr *Shdr, ELFYAML::RelocationSection &S) {
+  if (Error E = dumpCommonSection(Shdr, S))
+    return E;
 
   auto InfoSection = Obj.getSection(Shdr->sh_info);
   if (!InfoSection)
-    return errorToErrorCode(InfoSection.takeError());
+    return InfoSection.takeError();
 
   auto NameOrErr = getUniquedSectionName(*InfoSection);
   if (!NameOrErr)
-    return errorToErrorCode(NameOrErr.takeError());
+    return NameOrErr.takeError();
   S.RelocatableSec = NameOrErr.get();
 
-  return obj2yaml_error::success;
+  return Error::success();
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::DynamicSection *>
+Expected<ELFYAML::DynamicSection *>
 ELFDumper<ELFT>::dumpDynamicSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::DynamicSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto DynTagsOrErr = Obj.template getSectionContentsAsArray<Elf_Dyn>(Shdr);
   if (!DynTagsOrErr)
-    return errorToErrorCode(DynTagsOrErr.takeError());
+    return DynTagsOrErr.takeError();
 
   for (const Elf_Dyn &Dyn : *DynTagsOrErr)
     S->Entries.push_back({(ELFYAML::ELF_DYNTAG)Dyn.getTag(), Dyn.getVal()});
@@ -425,35 +423,35 @@ ELFDumper<ELFT>::dumpDynamicSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::RelocationSection *>
+Expected<ELFYAML::RelocationSection *>
 ELFDumper<ELFT>::dumpRelocSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::RelocationSection>();
-  if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
-    return EC;
+  if (auto E = dumpCommonRelocationSection(Shdr, *S))
+    return std::move(E);
 
   auto SymTabOrErr = Obj.getSection(Shdr->sh_link);
   if (!SymTabOrErr)
-    return errorToErrorCode(SymTabOrErr.takeError());
+    return SymTabOrErr.takeError();
   const Elf_Shdr *SymTab = *SymTabOrErr;
 
   if (Shdr->sh_type == ELF::SHT_REL) {
     auto Rels = Obj.rels(Shdr);
     if (!Rels)
-      return errorToErrorCode(Rels.takeError());
+      return Rels.takeError();
     for (const Elf_Rel &Rel : *Rels) {
       ELFYAML::Relocation R;
-      if (std::error_code EC = dumpRelocation(&Rel, SymTab, R))
-        return EC;
+      if (Error E = dumpRelocation(&Rel, SymTab, R))
+        return std::move(E);
       S->Relocations.push_back(R);
     }
   } else {
     auto Rels = Obj.relas(Shdr);
     if (!Rels)
-      return errorToErrorCode(Rels.takeError());
+      return Rels.takeError();
     for (const Elf_Rela &Rel : *Rels) {
       ELFYAML::Relocation R;
-      if (std::error_code EC = dumpRelocation(&Rel, SymTab, R))
-        return EC;
+      if (Error E = dumpRelocation(&Rel, SymTab, R))
+        return std::move(E);
       R.Addend = Rel.r_addend;
       S->Relocations.push_back(R);
     }
@@ -463,16 +461,15 @@ ELFDumper<ELFT>::dumpRelocSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::RawContentSection *>
+Expected<ELFYAML::RawContentSection *>
 ELFDumper<ELFT>::dumpContentSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::RawContentSection>();
-
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto ContentOrErr = Obj.getSectionContents(Shdr);
   if (!ContentOrErr)
-    return errorToErrorCode(ContentOrErr.takeError());
+    return ContentOrErr.takeError();
   ArrayRef<uint8_t> Content = *ContentOrErr;
   if (!Content.empty())
     S->Content = yaml::BinaryRef(Content);
@@ -482,40 +479,39 @@ ELFDumper<ELFT>::dumpContentSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::NoBitsSection *>
+Expected<ELFYAML::NoBitsSection *>
 ELFDumper<ELFT>::dumpNoBitsSection(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::NoBitsSection>();
-
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
   S->Size = Shdr->sh_size;
 
   return S.release();
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::VerdefSection *>
+Expected<ELFYAML::VerdefSection *>
 ELFDumper<ELFT>::dumpVerdefSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Verdef Elf_Verdef;
   typedef typename ELFT::Verdaux Elf_Verdaux;
 
   auto S = make_unique<ELFYAML::VerdefSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   S->Info = Shdr->sh_info;
 
   auto StringTableShdrOrErr = Obj.getSection(Shdr->sh_link);
   if (!StringTableShdrOrErr)
-    return errorToErrorCode(StringTableShdrOrErr.takeError());
+    return StringTableShdrOrErr.takeError();
 
   auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr);
   if (!StringTableOrErr)
-    return errorToErrorCode(StringTableOrErr.takeError());
+    return StringTableOrErr.takeError();
 
   auto Contents = Obj.getSectionContents(Shdr);
   if (!Contents)
-    return errorToErrorCode(Contents.takeError());
+    return Contents.takeError();
 
   llvm::ArrayRef<uint8_t> Data = *Contents;
   const uint8_t *Buf = Data.data();
@@ -544,17 +540,17 @@ ELFDumper<ELFT>::dumpVerdefSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::SymverSection *>
+Expected<ELFYAML::SymverSection *>
 ELFDumper<ELFT>::dumpSymverSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Half Elf_Half;
 
   auto S = make_unique<ELFYAML::SymverSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto VersionsOrErr = Obj.template getSectionContentsAsArray<Elf_Half>(Shdr);
   if (!VersionsOrErr)
-    return errorToErrorCode(VersionsOrErr.takeError());
+    return VersionsOrErr.takeError();
   for (const Elf_Half &E : *VersionsOrErr)
     S->Entries.push_back(E);
 
@@ -562,28 +558,28 @@ ELFDumper<ELFT>::dumpSymverSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::VerneedSection *>
+Expected<ELFYAML::VerneedSection *>
 ELFDumper<ELFT>::dumpVerneedSection(const Elf_Shdr *Shdr) {
   typedef typename ELFT::Verneed Elf_Verneed;
   typedef typename ELFT::Vernaux Elf_Vernaux;
 
   auto S = make_unique<ELFYAML::VerneedSection>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   S->Info = Shdr->sh_info;
 
   auto Contents = Obj.getSectionContents(Shdr);
   if (!Contents)
-    return errorToErrorCode(Contents.takeError());
+    return Contents.takeError();
 
   auto StringTableShdrOrErr = Obj.getSection(Shdr->sh_link);
   if (!StringTableShdrOrErr)
-    return errorToErrorCode(StringTableShdrOrErr.takeError());
+    return StringTableShdrOrErr.takeError();
 
   auto StringTableOrErr = Obj.getStringTable(*StringTableShdrOrErr);
   if (!StringTableOrErr)
-    return errorToErrorCode(StringTableOrErr.takeError());
+    return StringTableOrErr.takeError();
 
   llvm::ArrayRef<uint8_t> Data = *Contents;
   const uint8_t *Buf = Data.data();
@@ -619,32 +615,32 @@ ELFDumper<ELFT>::dumpVerneedSection(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
+Expected<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
   auto S = make_unique<ELFYAML::Group>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto SymtabOrErr = Obj.getSection(Shdr->sh_link);
   if (!SymtabOrErr)
-    return errorToErrorCode(SymtabOrErr.takeError());
+    return SymtabOrErr.takeError();
   // Get symbol with index sh_info which name is the signature of the group.
   const Elf_Shdr *Symtab = *SymtabOrErr;
   auto SymOrErr = Obj.getSymbol(Symtab, Shdr->sh_info);
   if (!SymOrErr)
-    return errorToErrorCode(SymOrErr.takeError());
+    return SymOrErr.takeError();
   auto StrTabOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (!StrTabOrErr)
-    return errorToErrorCode(StrTabOrErr.takeError());
+    return StrTabOrErr.takeError();
 
   Expected<StringRef> SymbolName =
       getUniquedSymbolName(*SymOrErr, *StrTabOrErr, Symtab);
   if (!SymbolName)
-    return errorToErrorCode(SymbolName.takeError());
+    return SymbolName.takeError();
   S->Signature = *SymbolName;
 
   auto MembersOrErr = Obj.template getSectionContentsAsArray<Elf_Word>(Shdr);
   if (!MembersOrErr)
-    return errorToErrorCode(MembersOrErr.takeError());
+    return MembersOrErr.takeError();
 
   for (Elf_Word Member : *MembersOrErr) {
     if (Member == llvm::ELF::GRP_COMDAT) {
@@ -654,27 +650,27 @@ ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
 
     auto SHdrOrErr = Obj.getSection(Member);
     if (!SHdrOrErr)
-      return errorToErrorCode(SHdrOrErr.takeError());
+      return SHdrOrErr.takeError();
     auto NameOrErr = getUniquedSectionName(*SHdrOrErr);
     if (!NameOrErr)
-      return errorToErrorCode(NameOrErr.takeError());
+      return NameOrErr.takeError();
     S->Members.push_back({*NameOrErr});
   }
   return S.release();
 }
 
 template <class ELFT>
-ErrorOr<ELFYAML::MipsABIFlags *>
+Expected<ELFYAML::MipsABIFlags *>
 ELFDumper<ELFT>::dumpMipsABIFlags(const Elf_Shdr *Shdr) {
   assert(Shdr->sh_type == ELF::SHT_MIPS_ABIFLAGS &&
          "Section type is not SHT_MIPS_ABIFLAGS");
   auto S = make_unique<ELFYAML::MipsABIFlags>();
-  if (std::error_code EC = dumpCommonSection(Shdr, *S))
-    return EC;
+  if (Error E = dumpCommonSection(Shdr, *S))
+    return std::move(E);
 
   auto ContentOrErr = Obj.getSectionContents(Shdr);
   if (!ContentOrErr)
-    return errorToErrorCode(ContentOrErr.takeError());
+    return ContentOrErr.takeError();
 
   auto *Flags = reinterpret_cast<const object::Elf_Mips_ABIFlags<ELFT> *>(
       ContentOrErr.get().data());
@@ -693,21 +689,20 @@ ELFDumper<ELFT>::dumpMipsABIFlags(const Elf_Shdr *Shdr) {
 }
 
 template <class ELFT>
-static std::error_code elf2yaml(raw_ostream &Out,
-                                const object::ELFFile<ELFT> &Obj) {
+static Error elf2yaml(raw_ostream &Out, const object::ELFFile<ELFT> &Obj) {
   ELFDumper<ELFT> Dumper(Obj);
-  ErrorOr<ELFYAML::Object *> YAMLOrErr = Dumper.dump();
-  if (std::error_code EC = YAMLOrErr.getError())
-    return EC;
+  Expected<ELFYAML::Object *> YAMLOrErr = Dumper.dump();
+  if (!YAMLOrErr)
+    return YAMLOrErr.takeError();
 
   std::unique_ptr<ELFYAML::Object> YAML(YAMLOrErr.get());
   yaml::Output Yout(Out);
   Yout << *YAML;
 
-  return std::error_code();
+  return Error::success();
 }
 
-std::error_code elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
+Error elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
   if (const auto *ELFObj = dyn_cast<object::ELF32LEObjectFile>(&Obj))
     return elf2yaml(Out, *ELFObj->getELFFile());
 
@@ -720,5 +715,5 @@ std::error_code elf2yaml(raw_ostream &Out, const object::ObjectFile &Obj) {
   if (const auto *ELFObj = dyn_cast<object::ELF64BEObjectFile>(&Obj))
     return elf2yaml(Out, *ELFObj->getELFFile());
 
-  return obj2yaml_error::unsupported_obj_file_format;
+  llvm_unreachable("unknown ELF file format");
 }
diff --git a/llvm/tools/obj2yaml/obj2yaml.cpp b/llvm/tools/obj2yaml/obj2yaml.cpp
index 8622e38319b6c..f03b1ef4bade3 100644
--- a/llvm/tools/obj2yaml/obj2yaml.cpp
+++ b/llvm/tools/obj2yaml/obj2yaml.cpp
@@ -17,19 +17,20 @@
 using namespace llvm;
 using namespace llvm::object;
 
-static std::error_code dumpObject(const ObjectFile &Obj) {
+static Error dumpObject(const ObjectFile &Obj) {
   if (Obj.isCOFF())
-    return coff2yaml(outs(), cast<COFFObjectFile>(Obj));
+    return errorCodeToError(coff2yaml(outs(), cast<COFFObjectFile>(Obj)));
 
   if (Obj.isXCOFF())
-    return xcoff2yaml(outs(), cast<XCOFFObjectFile>(Obj));
+    return errorCodeToError(xcoff2yaml(outs(), cast<XCOFFObjectFile>(Obj)));
 
   if (Obj.isELF())
     return elf2yaml(outs(), Obj);
+
   if (Obj.isWasm())
-    return wasm2yaml(outs(), cast<WasmObjectFile>(Obj));
+    return errorCodeToError(wasm2yaml(outs(), cast<WasmObjectFile>(Obj)));
 
-  return obj2yaml_error::unsupported_obj_file_format;
+  return errorCodeToError(obj2yaml_error::unsupported_obj_file_format);
 }
 
 static Error dumpInput(StringRef File) {
@@ -44,7 +45,7 @@ static Error dumpInput(StringRef File) {
     return errorCodeToError(macho2yaml(outs(), Binary));
   // TODO: If this is an archive, then burst it and dump each entry
   if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
-    return errorCodeToError(dumpObject(*Obj));
+    return dumpObject(*Obj);
   if (MinidumpFile *Minidump = dyn_cast<MinidumpFile>(&Binary))
     return minidump2yaml(outs(), *Minidump);
 
diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h
index b40e2c5c5a640..4f4a5330429ff 100644
--- a/llvm/tools/obj2yaml/obj2yaml.h
+++ b/llvm/tools/obj2yaml/obj2yaml.h
@@ -21,7 +21,7 @@
 
 std::error_code coff2yaml(llvm::raw_ostream &Out,
                           const llvm::object::COFFObjectFile &Obj);
-std::error_code elf2yaml(llvm::raw_ostream &Out,
+llvm::Error elf2yaml(llvm::raw_ostream &Out,
                          const llvm::object::ObjectFile &Obj);
 std::error_code macho2yaml(llvm::raw_ostream &Out,
                            const llvm::object::Binary &Obj);

From 960ff0810da8fe7bb0059acdc4fdc397440eb245 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Mon, 15 Jul 2019 11:58:10 +0000
Subject: [PATCH 095/451] [OpenCL][PR41727] Prevent ICE on global dtors

Pass NULL to pointer arg of __cxa_atexit if addr space
is not matching with its param. This doesn't align yet
with how dtors are generated that should be changed too.

Differential Revision: https://reviews.llvm.org/D62413

llvm-svn: 366059
---
 clang/lib/CodeGen/CGDeclCXX.cpp       | 20 +++++++++++++++++---
 clang/lib/CodeGen/CodeGenModule.cpp   |  8 ++++++--
 clang/lib/CodeGen/ItaniumCXXABI.cpp   | 21 +++++++++++++--------
 clang/lib/CodeGen/TargetInfo.h        |  5 +++++
 clang/test/CodeGenOpenCLCXX/atexit.cl | 11 +++++++++++
 5 files changed, 52 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCLCXX/atexit.cl

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index b03e3faeb3389..1c7c6fb5413db 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -14,6 +14,7 @@
 #include "CGCXXABI.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenMPRuntime.h"
+#include "TargetInfo.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Intrinsics.h"
@@ -118,9 +119,22 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
     CXXDestructorDecl *Dtor = Record->getDestructor();
 
     Func = CGM.getAddrAndTypeOfCXXStructor(GlobalDecl(Dtor, Dtor_Complete));
-    Argument = llvm::ConstantExpr::getBitCast(
-        Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo());
-
+    if (CGF.getContext().getLangOpts().OpenCL) {
+      auto DestAS =
+          CGM.getTargetCodeGenInfo().getAddrSpaceOfCxaAtexitPtrParam();
+      auto DestTy = CGF.getTypes().ConvertType(Type)->getPointerTo(
+          CGM.getContext().getTargetAddressSpace(DestAS));
+      auto SrcAS = D.getType().getQualifiers().getAddressSpace();
+      if (DestAS == SrcAS)
+        Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy);
+      else
+        // FIXME: On addr space mismatch we are passing NULL. The generation
+        // of the global destructor function should be adjusted accordingly.
+        Argument = llvm::ConstantPointerNull::get(DestTy);
+    } else {
+      Argument = llvm::ConstantExpr::getBitCast(
+          Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo());
+    }
   // Otherwise, the standard logic requires a helper function.
   } else {
     Func = CodeGenFunction(CGM)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ea52f0c61fce6..ba501d645c2c9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3577,8 +3577,12 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
 llvm::Constant *
 CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
                                      StringRef Name) {
-  auto *Ret =
-      GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr);
+  auto PtrTy =
+      getContext().getLangOpts().OpenCL
+          ? llvm::PointerType::get(
+                Ty, getContext().getTargetAddressSpace(LangAS::opencl_global))
+          : llvm::PointerType::getUnqual(Ty);
+  auto *Ret = GetOrCreateLLVMGlobal(Name, PtrTy, nullptr);
   setDSOLocal(cast<llvm::GlobalValue>(Ret->stripPointerCasts()));
   return Ret;
 }
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index a12f08fbe9a30..cb22239559ad1 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -2284,8 +2284,19 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
   llvm::Type *dtorTy =
     llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo();
 
+  // Preserve address space of addr.
+  auto AddrAS = addr ? addr->getType()->getPointerAddressSpace() : 0;
+  auto AddrInt8PtrTy =
+      AddrAS ? CGF.Int8Ty->getPointerTo(AddrAS) : CGF.Int8PtrTy;
+
+  // Create a variable that binds the atexit to this shared object.
+  llvm::Constant *handle =
+      CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
+  auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
+  GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
   // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d);
-  llvm::Type *paramTys[] = { dtorTy, CGF.Int8PtrTy, CGF.Int8PtrTy };
+  llvm::Type *paramTys[] = {dtorTy, AddrInt8PtrTy, handle->getType()};
   llvm::FunctionType *atexitTy =
     llvm::FunctionType::get(CGF.IntTy, paramTys, false);
 
@@ -2294,12 +2305,6 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
   if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit.getCallee()))
     fn->setDoesNotThrow();
 
-  // Create a variable that binds the atexit to this shared object.
-  llvm::Constant *handle =
-      CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle");
-  auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts());
-  GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-
   if (!addr)
     // addr is null when we are trying to register a dtor annotated with
     // __attribute__((destructor)) in a constructor function. Using null here is
@@ -2309,7 +2314,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
 
   llvm::Value *args[] = {llvm::ConstantExpr::getBitCast(
                              cast<llvm::Constant>(dtor.getCallee()), dtorTy),
-                         llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy),
+                         llvm::ConstantExpr::getBitCast(addr, AddrInt8PtrTy),
                          handle};
   CGF.EmitNounwindRuntimeCall(atexit, args);
 }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index d7e9eee9c5b3e..e1e90e73cb587 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -267,6 +267,11 @@ class TargetCodeGenInfo {
                                                LangAS SrcAddr, LangAS DestAddr,
                                                llvm::Type *DestTy) const;
 
+  /// Get address space of pointer parameter for __cxa_atexit.
+  virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const {
+    return LangAS::Default;
+  }
+
   /// Get the syncscope used in LLVM IR.
   virtual llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
                                                  SyncScope Scope,
diff --git a/clang/test/CodeGenOpenCLCXX/atexit.cl b/clang/test/CodeGenOpenCLCXX/atexit.cl
new file mode 100644
index 0000000000000..b4571f96a1a25
--- /dev/null
+++ b/clang/test/CodeGenOpenCLCXX/atexit.cl
@@ -0,0 +1,11 @@
+//RUN: %clang_cc1 %s -triple spir -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s
+
+struct S {
+  ~S(){};
+};
+S s;
+
+//CHECK-LABEL: define internal void @__cxx_global_var_init()
+//CHECK: call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S addrspace(4)*)* @_ZNU3AS41SD1Ev to void (i8*)*), i8* null, i8 addrspace(1)* @__dso_handle)
+
+//CHECK: declare i32 @__cxa_atexit(void (i8*)*, i8*, i8 addrspace(1)*)

From 7d5100115af5abef4ba53d0a15467695d218d987 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 15 Jul 2019 12:10:02 +0000
Subject: [PATCH 096/451] PDB HashTable: Make iterator key type const

Having the hash table key change during iteration is bad, so make it
impossible. Nothing relied on the key type not being const.

(This is also necessary to be able to call the const version of
iterator_facade_base::operator->(). Nothing calls this, and nothing
will, but I tried using it locally during development and it took me a
while to understand what was going wrong.)

Also rename the iterator typedef to const_iterator.

No behavior change.

Differential Revision: https://reviews.llvm.org/D64641

llvm-svn: 366060
---
 .../llvm/DebugInfo/PDB/Native/HashTable.h        | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index b00873b575b20..e045cc28f71ae 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -37,7 +37,7 @@ template <typename ValueT>
 class HashTableIterator
     : public iterator_facade_base<HashTableIterator<ValueT>,
                                   std::forward_iterator_tag,
-                                  std::pair<uint32_t, ValueT>> {
+                                  const std::pair<uint32_t, ValueT>> {
   friend HashTable<ValueT>;
 
   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
@@ -94,8 +94,8 @@ class HashTableIterator
 
 template <typename ValueT>
 class HashTable {
-  using iterator = HashTableIterator<ValueT>;
-  friend iterator;
+  using const_iterator = HashTableIterator<ValueT>;
+  friend const_iterator;
 
   struct Header {
     support::ulittle32_t Size;
@@ -206,20 +206,20 @@ class HashTable {
   uint32_t capacity() const { return Buckets.size(); }
   uint32_t size() const { return Present.count(); }
 
-  iterator begin() const { return iterator(*this); }
-  iterator end() const { return iterator(*this, 0, true); }
+  const_iterator begin() const { return const_iterator(*this); }
+  const_iterator end() const { return const_iterator(*this, 0, true); }
 
   /// Find the entry whose key has the specified hash value, using the specified
   /// traits defining hash function and equality.
   template <typename Key, typename TraitsT>
-  iterator find_as(const Key &K, TraitsT &Traits) const {
+  const_iterator find_as(const Key &K, TraitsT &Traits) const {
     uint32_t H = Traits.hashLookupKey(K) % capacity();
     uint32_t I = H;
     Optional<uint32_t> FirstUnused;
     do {
       if (isPresent(I)) {
         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
-          return iterator(*this, I, false);
+          return const_iterator(*this, I, false);
       } else {
         if (!FirstUnused)
           FirstUnused = I;
@@ -238,7 +238,7 @@ class HashTable {
     // table were Present.  But this would violate the load factor constraints
     // that we impose, so it should never happen.
     assert(FirstUnused);
-    return iterator(*this, *FirstUnused, true);
+    return const_iterator(*this, *FirstUnused, true);
   }
 
   /// Set the entry using a key type that the specified Traits can convert

From 14d115ff98ac2324bf449f5021d172560b5e8150 Mon Sep 17 00:00:00 2001
From: Balazs Keri <1.int32@gmail.com>
Date: Mon, 15 Jul 2019 12:16:30 +0000
Subject: [PATCH 097/451] [ASTImporter] Using Lang_CXX14 in
 ASTImporterVisibilityTest.

Summary:
These tests may work with C++14 language constructs in the future
(variable templates and others).
To avoid warnings about language version C++ version constants in the tests
are updated.

Reviewers: martong, a.sidorin

Reviewed By: martong

Subscribers: rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64477

llvm-svn: 366061
---
 .../AST/ASTImporterVisibilityTest.cpp         | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/clang/unittests/AST/ASTImporterVisibilityTest.cpp b/clang/unittests/AST/ASTImporterVisibilityTest.cpp
index 66c967e8460b9..88543257b75a4 100644
--- a/clang/unittests/AST/ASTImporterVisibilityTest.cpp
+++ b/clang/unittests/AST/ASTImporterVisibilityTest.cpp
@@ -80,13 +80,13 @@ class ImportVisibilityChain
     std::string Code = getCode() + getCode();
     auto Pattern = getPattern();
 
-    TranslationUnitDecl *FromTu = getTuDecl(Code, Lang_CXX, "input0.cc");
+    TranslationUnitDecl *FromTu = getTuDecl(Code, Lang_CXX14, "input0.cc");
 
     auto *FromD0 = FirstDeclMatcher<DeclTy>().match(FromTu, Pattern);
     auto *FromD1 = LastDeclMatcher<DeclTy>().match(FromTu, Pattern);
 
-    auto *ToD0 = Import(FromD0, Lang_CXX);
-    auto *ToD1 = Import(FromD1, Lang_CXX);
+    auto *ToD0 = Import(FromD0, Lang_CXX14);
+    auto *ToD1 = Import(FromD1, Lang_CXX14);
 
     EXPECT_TRUE(ToD0);
     ASSERT_TRUE(ToD1);
@@ -157,13 +157,14 @@ class ImportVisibility
   BindableMatcher<Decl> getPattern() const { return PatternFactory()(); }
 
   void TypedTest_ImportAfter() {
-    TranslationUnitDecl *ToTu = getToTuDecl(getCode0(), Lang_CXX);
-    TranslationUnitDecl *FromTu = getTuDecl(getCode1(), Lang_CXX, "input1.cc");
+    TranslationUnitDecl *ToTu = getToTuDecl(getCode0(), Lang_CXX14);
+    TranslationUnitDecl *FromTu =
+        getTuDecl(getCode1(), Lang_CXX14, "input1.cc");
 
     auto *ToD0 = FirstDeclMatcher<DeclTy>().match(ToTu, getPattern());
     auto *FromD1 = FirstDeclMatcher<DeclTy>().match(FromTu, getPattern());
 
-    auto *ToD1 = Import(FromD1, Lang_CXX);
+    auto *ToD1 = Import(FromD1, Lang_CXX14);
 
     ASSERT_TRUE(ToD0);
     ASSERT_TRUE(ToD1);
@@ -176,12 +177,14 @@ class ImportVisibility
   }
 
   void TypedTest_ImportAfterImport() {
-    TranslationUnitDecl *FromTu0 = getTuDecl(getCode0(), Lang_CXX, "input0.cc");
-    TranslationUnitDecl *FromTu1 = getTuDecl(getCode1(), Lang_CXX, "input1.cc");
+    TranslationUnitDecl *FromTu0 =
+        getTuDecl(getCode0(), Lang_CXX14, "input0.cc");
+    TranslationUnitDecl *FromTu1 =
+        getTuDecl(getCode1(), Lang_CXX14, "input1.cc");
     auto *FromD0 = FirstDeclMatcher<DeclTy>().match(FromTu0, getPattern());
     auto *FromD1 = FirstDeclMatcher<DeclTy>().match(FromTu1, getPattern());
-    auto *ToD0 = Import(FromD0, Lang_CXX);
-    auto *ToD1 = Import(FromD1, Lang_CXX);
+    auto *ToD0 = Import(FromD0, Lang_CXX14);
+    auto *ToD1 = Import(FromD1, Lang_CXX14);
     ASSERT_TRUE(ToD0);
     ASSERT_TRUE(ToD1);
     EXPECT_NE(ToD0, ToD1);
@@ -192,13 +195,14 @@ class ImportVisibility
   }
 
   void TypedTest_ImportAfterWithMerge() {
-    TranslationUnitDecl *ToTu = getToTuDecl(getCode0(), Lang_CXX);
-    TranslationUnitDecl *FromTu = getTuDecl(getCode1(), Lang_CXX, "input1.cc");
+    TranslationUnitDecl *ToTu = getToTuDecl(getCode0(), Lang_CXX14);
+    TranslationUnitDecl *FromTu =
+        getTuDecl(getCode1(), Lang_CXX14, "input1.cc");
 
     auto *ToF0 = FirstDeclMatcher<DeclTy>().match(ToTu, getPattern());
     auto *FromF1 = FirstDeclMatcher<DeclTy>().match(FromTu, getPattern());
 
-    auto *ToF1 = Import(FromF1, Lang_CXX);
+    auto *ToF1 = Import(FromF1, Lang_CXX14);
 
     ASSERT_TRUE(ToF0);
     ASSERT_TRUE(ToF1);
@@ -213,12 +217,14 @@ class ImportVisibility
   }
 
   void TypedTest_ImportAfterImportWithMerge() {
-    TranslationUnitDecl *FromTu0 = getTuDecl(getCode0(), Lang_CXX, "input0.cc");
-    TranslationUnitDecl *FromTu1 = getTuDecl(getCode1(), Lang_CXX, "input1.cc");
+    TranslationUnitDecl *FromTu0 =
+        getTuDecl(getCode0(), Lang_CXX14, "input0.cc");
+    TranslationUnitDecl *FromTu1 =
+        getTuDecl(getCode1(), Lang_CXX14, "input1.cc");
     auto *FromF0 = FirstDeclMatcher<DeclTy>().match(FromTu0, getPattern());
     auto *FromF1 = FirstDeclMatcher<DeclTy>().match(FromTu1, getPattern());
-    auto *ToF0 = Import(FromF0, Lang_CXX);
-    auto *ToF1 = Import(FromF1, Lang_CXX);
+    auto *ToF0 = Import(FromF0, Lang_CXX14);
+    auto *ToF1 = Import(FromF1, Lang_CXX14);
     ASSERT_TRUE(ToF0);
     ASSERT_TRUE(ToF1);
     if (shouldBeLinked())

From 130df2c7e8fddb7aefccfa1497d5d75062f20aae Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 15 Jul 2019 13:00:43 +0000
Subject: [PATCH 098/451] Fix uninitialized variable analyzer warning. NFCI.

llvm-svn: 366062
---
 clang/lib/AST/ASTImporter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 863a1cb0af8c1..8a59c3a76990d 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -8075,7 +8075,7 @@ ASTImporter::Import(NestedNameSpecifier *FromNNS) {
   if (!FromNNS)
     return nullptr;
 
-  NestedNameSpecifier *Prefix;
+  NestedNameSpecifier *Prefix = nullptr;
   if (Error Err = importInto(Prefix, FromNNS->getPrefix()))
     return std::move(Err);
 

From d6f34bf4d4a3dfdbd9ae62865cbd30448dcc272d Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Mon, 15 Jul 2019 13:02:21 +0000
Subject: [PATCH 099/451] [OpenCL] Deduce addr space for pointee of dependent
 types in instantiation.

Since pointee doesn't require context sensitive addr space deduction
it's easier to handle pointee of dependent types during templ
instantiation.

Differential Revision: https://reviews.llvm.org/D64400

llvm-svn: 366063
---
 clang/lib/Sema/TreeTransform.h                | 17 ++++++++
 .../SemaOpenCLCXX/address-space-deduction.cl  | 39 +++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index a6122c933848b..3b841ec649a8f 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -4536,6 +4536,14 @@ QualType TreeTransform<Derived>::TransformDecayedType(TypeLocBuilder &TLB,
   return Result;
 }
 
+/// Helper to deduce addr space of a pointee type in OpenCL mode.
+/// If the type is updated it will be overwritten in PointeeType param.
+static void deduceOpenCLPointeeAddrSpace(Sema &SemaRef, QualType &PointeeType) {
+  if (PointeeType.getAddressSpace() == LangAS::Default)
+    PointeeType = SemaRef.Context.getAddrSpaceQualType(PointeeType,
+                                                       LangAS::opencl_generic);
+}
+
 template<typename Derived>
 QualType TreeTransform<Derived>::TransformPointerType(TypeLocBuilder &TLB,
                                                       PointerTypeLoc TL) {
@@ -4544,6 +4552,9 @@ QualType TreeTransform<Derived>::TransformPointerType(TypeLocBuilder &TLB,
   if (PointeeType.isNull())
     return QualType();
 
+  if (SemaRef.getLangOpts().OpenCL)
+    deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType);
+
   QualType Result = TL.getType();
   if (PointeeType->getAs<ObjCObjectType>()) {
     // A dependent pointer type 'T *' has is being transformed such
@@ -4582,6 +4593,9 @@ TreeTransform<Derived>::TransformBlockPointerType(TypeLocBuilder &TLB,
   if (PointeeType.isNull())
     return QualType();
 
+  if (SemaRef.getLangOpts().OpenCL)
+    deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType);
+
   QualType Result = TL.getType();
   if (getDerived().AlwaysRebuild() ||
       PointeeType != TL.getPointeeLoc().getType()) {
@@ -4611,6 +4625,9 @@ TreeTransform<Derived>::TransformReferenceType(TypeLocBuilder &TLB,
   if (PointeeType.isNull())
     return QualType();
 
+  if (SemaRef.getLangOpts().OpenCL)
+    deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType);
+
   QualType Result = TL.getType();
   if (getDerived().AlwaysRebuild() ||
       PointeeType != T->getPointeeTypeAsWritten()) {
diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
index 6b2a07cad748b..f66d224e25414 100644
--- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
@@ -24,3 +24,42 @@ struct c2 {
   alias_c1_ptr ptr = &y;
 };
 
+
+// Addr spaces for pointee of dependent types are not deduced
+// during parsing but during template instantiation instead.
+
+template <class T>
+struct x1 {
+//CHECK: -CXXMethodDecl {{.*}} operator= 'x1<T> &(const x1<T> &) __generic'
+//CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1<int> &(const __generic x1<int> &) __generic'
+  x1<T>& operator=(const x1<T>& xx) {
+    y = xx.y;
+    return *this;
+  }
+  int y;
+};
+
+template <class T>
+struct x2 {
+//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1<T> *) __generic'
+//CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1<int> *) __generic'
+  void foo(x1<T>* xx) {
+    m[0] = *xx;
+  }
+//CHECK: -FieldDecl {{.*}}  m 'x1<int> [2]'
+  x1<T> m[2];
+};
+
+void bar(__global x1<int> *xx, __global x2<int> *bar) {
+  bar->foo(xx);
+}
+
+template <typename T>
+class x3 : public T {
+public:
+  //CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &) __generic'
+  x3(const x3 &t);
+};
+//CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &) __generic'
+template <typename T>
+x3<T>::x3(const x3<T> &t) {}

From 83ae0b5eb4f84721eadaa428139d140f915353b7 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Mon, 15 Jul 2019 13:12:36 +0000
Subject: [PATCH 100/451] [mips] Remove "else-after-return". NFC

llvm-svn: 366064
---
 llvm/lib/Target/Mips/MipsISelLowering.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 89bfd9af33b7e..27bf18c8ba90d 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -655,7 +655,7 @@ class TargetRegisterClass;
     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "R")
         return InlineAsm::Constraint_R;
-      else if (ConstraintCode == "ZC")
+      if (ConstraintCode == "ZC")
         return InlineAsm::Constraint_ZC;
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }

From 12400b97838d78386920cbda3c9dadb9f8727ce3 Mon Sep 17 00:00:00 2001
From: David Zarzycki <dave@znu.io>
Date: Mon, 15 Jul 2019 14:12:35 +0000
Subject: [PATCH 101/451] [Testing] Add missing "REQUIRES: asserts"

This broke after r366048 / https://reviews.llvm.org/D63923

llvm-svn: 366065
---
 llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
index b669904297940..dc023f36dd219 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt < %s -S -debug-only=loop-unroll -loop-unroll -unroll-runtime -unroll-peel-multi-deopt-exit 2>&1 | FileCheck %s
 ; RUN: opt < %s -S -debug-only=loop-unroll -unroll-peel-multi-deopt-exit -passes='require<profile-summary>,function(require<opt-remark-emit>,unroll)' 2>&1 | FileCheck %s
 

From 60fb5e97a0df790eea5c05b82fb1ee87d48e4d76 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 15 Jul 2019 14:29:14 +0000
Subject: [PATCH 102/451] [X86] isTargetShuffleEquivalent - assert the expected
 mask is correctly formed. NFCI.

While we don't make any assumptions about the actual mask, assert that the expected mask only contains valid mask element values.

llvm-svn: 366066
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8be441fe28e08..c5bf3dcac45df 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10001,6 +10001,8 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
   int Size = Mask.size();
   if (Size != (int)ExpectedMask.size())
     return false;
+  assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) &&
+         "Illegal target shuffle mask");
 
   for (int i = 0; i < Size; ++i)
     if (Mask[i] == SM_SentinelUndef)

From 8d879c8d95496c0a9049ff80261fbc5754ce56b3 Mon Sep 17 00:00:00 2001
From: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Date: Mon, 15 Jul 2019 14:37:57 +0000
Subject: [PATCH 103/451] [AMDGPU][MC] Corrected encoding of src0 for DS_GWS_*
 instructions

See bug 42599: https://bugs.llvm.org/show_bug.cgi?id=42599

Reviewers: artem.tamazov, arsenm

Differential Revision: https://reviews.llvm.org/D64716

llvm-svn: 366067
---
 llvm/lib/Target/AMDGPU/DSInstructions.td      |  8 ++--
 llvm/test/MC/AMDGPU/ds.s                      | 16 +++----
 llvm/test/MC/AMDGPU/expressions.s             |  2 +-
 llvm/test/MC/AMDGPU/gfx10_asm_all.s           | 24 +++++-----
 llvm/test/MC/AMDGPU/gfx7_asm_all.s            | 30 ++++++------
 llvm/test/MC/AMDGPU/gfx8_asm_all.s            | 30 ++++++------
 llvm/test/MC/AMDGPU/gfx9_asm_all.s            | 30 ++++++------
 llvm/test/MC/Disassembler/AMDGPU/ds_vi.txt    | 16 +++----
 .../MC/Disassembler/AMDGPU/gfx10_dasm_all.txt | 36 +++++++-------
 .../MC/Disassembler/AMDGPU/gfx8_dasm_all.txt  | 48 +++++++++----------
 .../MC/Disassembler/AMDGPU/gfx9_dasm_all.txt  | 48 +++++++++----------
 11 files changed, 145 insertions(+), 143 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 93308e6d78a59..e39f565fd225d 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -38,6 +38,8 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
   bits<1> has_data0 = 1;
   bits<1> has_data1 = 1;
 
+  bits<1> has_gws_data0 = 0; // data0 is encoded as addr
+
   bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
   bits<1> has_offset0 = 1;
   bits<1> has_offset1 = 1;
@@ -320,7 +322,7 @@ class DS_GWS_1D <string opName>
 : DS_GWS<opName,
   (ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
 
-  let has_data0 = 1;
+  let has_gws_data0 = 1;
 }
 
 class DS_VOID <string opName> : DS_Pseudo<opName,
@@ -832,7 +834,7 @@ class Base_DS_Real_gfx6_gfx7_gfx10<bits<8> op, DS_Pseudo ps, int ef> :
   let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
   let Inst{25-18} = op;
   let Inst{31-26} = 0x36;
-  let Inst{39-32} = !if(ps.has_addr, addr, 0);
+  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0, 0));
   let Inst{47-40} = !if(ps.has_data0, data0, 0);
   let Inst{55-48} = !if(ps.has_data1, data1, 0);
   let Inst{63-56} = !if(ps.has_vdst, vdst, 0);
@@ -1051,7 +1053,7 @@ class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
   let Inst{16}    = !if(ds.has_gds, gds, ds.gdsValue);
   let Inst{24-17} = op;
   let Inst{31-26} = 0x36; // ds prefix
-  let Inst{39-32} = !if(ds.has_addr, addr, 0);
+  let Inst{39-32} = !if(ds.has_addr, addr, !if(ds.has_gws_data0, data0, 0));
   let Inst{47-40} = !if(ds.has_data0, data0, 0);
   let Inst{55-48} = !if(ds.has_data1, data1, 0);
   let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s
index 1bb0597209ea5..70f52972a81c8 100644
--- a/llvm/test/MC/AMDGPU/ds.s
+++ b/llvm/test/MC/AMDGPU/ds.s
@@ -148,12 +148,12 @@ ds_max_f32 v2, v4
 // VI:   ds_max_f32 v2, v4 ; encoding: [0x00,0x00,0x26,0xd8,0x02,0x04,0x00,0x00]
 
 ds_gws_init v2 gds
-// SICI: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x66,0xd8,0x00,0x02,0x00,0x00]
-// VI:   ds_gws_init v2 gds ; encoding: [0x00,0x00,0x33,0xd9,0x00,0x02,0x00,0x00]
+// SICI: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x66,0xd8,0x02,0x00,0x00,0x00]
+// VI: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x33,0xd9,0x02,0x00,0x00,0x00]
 
 ds_gws_init v3 offset:12345 gds
-// SICI: ds_gws_init v3 offset:12345 gds ; encoding: [0x39,0x30,0x66,0xd8,0x00,0x03,0x00,0x00]
-// VI:   ds_gws_init v3 offset:12345 gds ; encoding: [0x39,0x30,0x33,0xd9,0x00,0x03,0x00,0x00]
+// SICI: ds_gws_init v3 offset:12345 gds ; encoding: [0x39,0x30,0x66,0xd8,0x03,0x00,0x00,0x00]
+// VI: ds_gws_init v3 offset:12345 gds ; encoding: [0x39,0x30,0x33,0xd9,0x03,0x00,0x00,0x00]
 
 ds_gws_sema_v gds
 // SICI: ds_gws_sema_v gds ; encoding: [0x00,0x00,0x6a,0xd8,0x00,0x00,0x00,0x00]
@@ -164,16 +164,16 @@ ds_gws_sema_v offset:257 gds
 // VI:   ds_gws_sema_v offset:257 gds    ; encoding: [0x01,0x01,0x35,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v2 gds
-// SICI: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x6e,0xd8,0x00,0x02,0x00,0x00]
-// VI:   ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x37,0xd9,0x00,0x02,0x00,0x00]
+// SICI: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x6e,0xd8,0x02,0x00,0x00,0x00]
+// VI: ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x37,0xd9,0x02,0x00,0x00,0x00]
 
 ds_gws_sema_p gds
 // SICI: ds_gws_sema_p gds ; encoding: [0x00,0x00,0x72,0xd8,0x00,0x00,0x00,0x00]
 // VI:   ds_gws_sema_p gds ; encoding: [0x00,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v2 gds
-// SICI: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x76,0xd8,0x00,0x02,0x00,0x00]
-// VI:   ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x3b,0xd9,0x00,0x02,0x00,0x00]
+// SICI: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x76,0xd8,0x02,0x00,0x00,0x00]
+// VI: ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x3b,0xd9,0x02,0x00,0x00,0x00]
 
 ds_write_b8 v2, v4
 // SICI: ds_write_b8 v2, v4 ; encoding: [0x00,0x00,0x78,0xd8,0x02,0x04,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/expressions.s b/llvm/test/MC/AMDGPU/expressions.s
index a6037a226b66e..37fe08a52d1ba 100644
--- a/llvm/test/MC/AMDGPU/expressions.s
+++ b/llvm/test/MC/AMDGPU/expressions.s
@@ -233,7 +233,7 @@ s_mov_b32 s0, global
 
 // Use a token with the same name as a global
 ds_gws_init v2 gds
-// VI: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x33,0xd9,0x00,0x02,0x00,0x00]
+// VI: ds_gws_init v2 gds ; encoding: [0x00,0x00,0x33,0xd9,0x02,0x00,0x00,0x00]
 
 // Use a global with the same name as a token
 s_mov_b32 s0, gds
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
index 9f9407c0b8f82..4916702dbcb12 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
@@ -2462,25 +2462,25 @@ ds_gws_init v0 gds
 // GFX10: encoding: [0x00,0x00,0x66,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_init v255 gds
-// GFX10: encoding: [0x00,0x00,0x66,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x00,0x00,0x66,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_init v0 offset:0 gds
 // GFX10: encoding: [0x00,0x00,0x66,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_init v255 offset:0 gds
-// GFX10: encoding: [0x00,0x00,0x66,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x00,0x00,0x66,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_init v0 offset:4660 gds
 // GFX10: encoding: [0x34,0x12,0x66,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_init v255 offset:4660 gds
-// GFX10: encoding: [0x34,0x12,0x66,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x34,0x12,0x66,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_init v0 offset:65535 gds
 // GFX10: encoding: [0xff,0xff,0x66,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_init v255 offset:65535 gds
-// GFX10: encoding: [0xff,0xff,0x66,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0xff,0xff,0x66,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_v gds
 // GFX10: encoding: [0x00,0x00,0x6a,0xd8,0x00,0x00,0x00,0x00]
@@ -2498,25 +2498,25 @@ ds_gws_sema_br v0 gds
 // GFX10: encoding: [0x00,0x00,0x6e,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 gds
-// GFX10: encoding: [0x00,0x00,0x6e,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x00,0x00,0x6e,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_br v0 offset:0 gds
 // GFX10: encoding: [0x00,0x00,0x6e,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 offset:0 gds
-// GFX10: encoding: [0x00,0x00,0x6e,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x00,0x00,0x6e,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_br v0 offset:4660 gds
 // GFX10: encoding: [0x34,0x12,0x6e,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 offset:4660 gds
-// GFX10: encoding: [0x34,0x12,0x6e,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x34,0x12,0x6e,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_br v0 offset:65535 gds
 // GFX10: encoding: [0xff,0xff,0x6e,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 offset:65535 gds
-// GFX10: encoding: [0xff,0xff,0x6e,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0xff,0xff,0x6e,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_p gds
 // GFX10: encoding: [0x00,0x00,0x72,0xd8,0x00,0x00,0x00,0x00]
@@ -2534,25 +2534,25 @@ ds_gws_barrier v0 gds
 // GFX10: encoding: [0x00,0x00,0x76,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v255 gds
-// GFX10: encoding: [0x00,0x00,0x76,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x00,0x00,0x76,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_barrier v0 offset:0 gds
 // GFX10: encoding: [0x00,0x00,0x76,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v255 offset:0 gds
-// GFX10: encoding: [0x00,0x00,0x76,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x00,0x00,0x76,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_barrier v0 offset:4660 gds
 // GFX10: encoding: [0x34,0x12,0x76,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v255 offset:4660 gds
-// GFX10: encoding: [0x34,0x12,0x76,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0x34,0x12,0x76,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_barrier v0 offset:65535 gds
 // GFX10: encoding: [0xff,0xff,0x76,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v255 offset:65535 gds
-// GFX10: encoding: [0xff,0xff,0x76,0xd8,0x00,0xff,0x00,0x00]
+// GFX10: encoding: [0xff,0xff,0x76,0xd8,0xff,0x00,0x00,0x00]
 
 ds_write_b8 v0, v1
 // GFX10: encoding: [0x00,0x00,0x78,0xd8,0x00,0x01,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx7_asm_all.s b/llvm/test/MC/AMDGPU/gfx7_asm_all.s
index 80d6e2f721971..434c64b72ff11 100644
--- a/llvm/test/MC/AMDGPU/gfx7_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx7_asm_all.s
@@ -469,19 +469,19 @@ ds_gws_sema_release_all offset:4 gds
 // CHECK: [0x04,0x00,0x62,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x66,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x66,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_init v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x66,0xd8,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x66,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_init v1 gds
-// CHECK: [0x00,0x00,0x66,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x66,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:0 gds
-// CHECK: [0x00,0x00,0x66,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x66,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:4 gds
-// CHECK: [0x04,0x00,0x66,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x66,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_v offset:65535 gds
 // CHECK: [0xff,0xff,0x6a,0xd8,0x00,0x00,0x00,0x00]
@@ -496,19 +496,19 @@ ds_gws_sema_v offset:4 gds
 // CHECK: [0x04,0x00,0x6a,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x6e,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x6e,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x6e,0xd8,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x6e,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 gds
-// CHECK: [0x00,0x00,0x6e,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x6e,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:0 gds
-// CHECK: [0x00,0x00,0x6e,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x6e,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:4 gds
-// CHECK: [0x04,0x00,0x6e,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x6e,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_p offset:65535 gds
 // CHECK: [0xff,0xff,0x72,0xd8,0x00,0x00,0x00,0x00]
@@ -523,19 +523,19 @@ ds_gws_sema_p offset:4 gds
 // CHECK: [0x04,0x00,0x72,0xd8,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x76,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x76,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x76,0xd8,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x76,0xd8,0xff,0x00,0x00,0x00]
 
 ds_gws_barrier v1 gds
-// CHECK: [0x00,0x00,0x76,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x76,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:0 gds
-// CHECK: [0x00,0x00,0x76,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x76,0xd8,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:4 gds
-// CHECK: [0x04,0x00,0x76,0xd8,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x76,0xd8,0x01,0x00,0x00,0x00]
 
 ds_write_b8 v1, v2 offset:65535
 // CHECK: [0xff,0xff,0x78,0xd8,0x01,0x02,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_all.s b/llvm/test/MC/AMDGPU/gfx8_asm_all.s
index 9ae178a3a79c7..1f2de29cccb39 100644
--- a/llvm/test/MC/AMDGPU/gfx8_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx8_asm_all.s
@@ -2665,19 +2665,19 @@ ds_gws_sema_release_all offset:4 gds
 // CHECK: [0x04,0x00,0x31,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_init v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x33,0xd9,0xff,0x00,0x00,0x00]
 
 ds_gws_init v1 gds
-// CHECK: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:0 gds
-// CHECK: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:4 gds
-// CHECK: [0x04,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_v offset:65535 gds
 // CHECK: [0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00]
@@ -2692,19 +2692,19 @@ ds_gws_sema_v offset:4 gds
 // CHECK: [0x04,0x00,0x35,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x37,0xd9,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 gds
-// CHECK: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:0 gds
-// CHECK: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:4 gds
-// CHECK: [0x04,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_p offset:65535 gds
 // CHECK: [0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00]
@@ -2719,19 +2719,19 @@ ds_gws_sema_p offset:4 gds
 // CHECK: [0x04,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x3b,0xd9,0xff,0x00,0x00,0x00]
 
 ds_gws_barrier v1 gds
-// CHECK: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:0 gds
-// CHECK: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:4 gds
-// CHECK: [0x04,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_consume v5 offset:65535
 // CHECK: [0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_all.s b/llvm/test/MC/AMDGPU/gfx9_asm_all.s
index 104e8ea3f036d..fc4d8f438cc33 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_all.s
@@ -2834,19 +2834,19 @@ ds_gws_sema_release_all offset:4 gds
 // CHECK: [0x04,0x00,0x31,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_init v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x33,0xd9,0xff,0x00,0x00,0x00]
 
 ds_gws_init v1 gds
-// CHECK: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:0 gds
-// CHECK: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_init v1 offset:4 gds
-// CHECK: [0x04,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_v offset:65535 gds
 // CHECK: [0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00]
@@ -2861,19 +2861,19 @@ ds_gws_sema_v offset:4 gds
 // CHECK: [0x04,0x00,0x35,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x37,0xd9,0xff,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 gds
-// CHECK: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:0 gds
-// CHECK: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_br v1 offset:4 gds
-// CHECK: [0x04,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_sema_p offset:65535 gds
 // CHECK: [0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00]
@@ -2888,19 +2888,19 @@ ds_gws_sema_p offset:4 gds
 // CHECK: [0x04,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:65535 gds
-// CHECK: [0xff,0xff,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0xff,0xff,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v255 offset:65535 gds
-// CHECK: [0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00]
+// CHECK: [0xff,0xff,0x3b,0xd9,0xff,0x00,0x00,0x00]
 
 ds_gws_barrier v1 gds
-// CHECK: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:0 gds
-// CHECK: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_gws_barrier v1 offset:4 gds
-// CHECK: [0x04,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
+// CHECK: [0x04,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
 
 ds_consume v5 offset:65535
 // CHECK: [0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/ds_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/ds_vi.txt
index 7f0cb4a0088f5..094465776e191 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/ds_vi.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/ds_vi.txt
@@ -81,11 +81,11 @@
 # VI:   ds_max_f32 v2, v4 ; encoding: [0x00,0x00,0x26,0xd8,0x02,0x04,0x00,0x00]
 0x00 0x00 0x26 0xd8 0x02 0x04 0x00 0x00
 
-# VI:   ds_gws_init v2 gds ; encoding: [0x00,0x00,0x33,0xd9,0x00,0x02,0x00,0x00]
-0x00 0x00 0x33 0xd9 0x00 0x02 0x00,0x00
+# VI:   ds_gws_init v2 gds ; encoding: [0x00,0x00,0x33,0xd9,0x02,0x00,0x00,0x00]
+0x00 0x00 0x33 0xd9 0x02 0x00 0x00,0x00
 
-# VI:   ds_gws_init v3 offset:12345 gds ; encoding: [0x39,0x30,0x33,0xd9,0x00,0x03,0x00,0x00]
-0x39 0x30 0x33 0xd9 0x00 0x03 0x00 0x00
+# VI:   ds_gws_init v3 offset:12345 gds ; encoding: [0x39,0x30,0x33,0xd9,0x03,0x00,0x00,0x00]
+0x39 0x30 0x33 0xd9 0x03 0x00 0x00 0x00
 
 # VI:   ds_gws_sema_v gds ; encoding: [0x00,0x00,0x35,0xd9,0x00,0x00,0x00,0x00]
 0x00 0x00 0x35 0xd9 0x00 0x00 0x00 0x00
@@ -93,14 +93,14 @@
 # VI:   ds_gws_sema_v offset:257 gds    ; encoding: [0x01,0x01,0x35,0xd9,0x00,0x00,0x00,0x00]
 0x01 0x01 0x35 0xd9 0x00 0x00 0x00 0x00
 
-# VI:   ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x37,0xd9,0x00,0x02,0x00,0x00]
-0x00 0x00 0x37 0xd9 0x00 0x02 0x00 0x00
+# VI:   ds_gws_sema_br v2 gds ; encoding: [0x00,0x00,0x37,0xd9,0x02,0x00,0x00,0x00]
+0x00 0x00 0x37 0xd9 0x02 0x00 0x00 0x00
 
 # VI:   ds_gws_sema_p gds ; encoding: [0x00,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
 0x00 0x00 0x39 0xd9 0x00 0x00 0x00 0x00
 
-# VI:   ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x3b,0xd9,0x00,0x02,0x00,0x00]
-0x00 0x00 0x3b 0xd9 0x00 0x02 0x00 0x00
+# VI:   ds_gws_barrier v2 gds ; encoding: [0x00,0x00,0x3b,0xd9,0x02,0x00,0x00,0x00]
+0x00 0x00 0x3b 0xd9 0x02 0x00 0x00 0x00
 
 # VI:   ds_write_b8 v2, v4 ; encoding: [0x00,0x00,0x3c,0xd8,0x02,0x04,0x00,0x00]
 0x00 0x00 0x3c 0xd8 0x02 0x04 0x00 0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index 464dc0cd14bbb..e564aff595879 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -3578,14 +3578,14 @@
 # GFX10: ds_gws_barrier v0 offset:65535 gds ; encoding: [0xff,0xff,0x76,0xd8,0x00,0x00,0x00,0x00]
 0xff,0xff,0x76,0xd8,0x00,0x00,0x00,0x00
 
-# GFX10: ds_gws_barrier v255 gds         ; encoding: [0x00,0x00,0x76,0xd8,0x00,0xff,0x00,0x00]
-0x00,0x00,0x76,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_barrier v255 gds         ; encoding: [0x00,0x00,0x76,0xd8,0xff,0x00,0x00,0x00]
+0x00,0x00,0x76,0xd8,0xff,0x00,0x00,0x00
 
-# GFX10: ds_gws_barrier v255 offset:4660 gds ; encoding: [0x34,0x12,0x76,0xd8,0x00,0xff,0x00,0x00]
-0x34,0x12,0x76,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_barrier v255 offset:4660 gds ; encoding: [0x34,0x12,0x76,0xd8,0xff,0x00,0x00,0x00]
+0x34,0x12,0x76,0xd8,0xff,0x00,0x00,0x00
 
-# GFX10: ds_gws_barrier v255 offset:65535 gds ; encoding: [0xff,0xff,0x76,0xd8,0x00,0xff,0x00,0x00]
-0xff,0xff,0x76,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_barrier v255 offset:65535 gds ; encoding: [0xff,0xff,0x76,0xd8,0xff,0x00,0x00,0x00]
+0xff,0xff,0x76,0xd8,0xff,0x00,0x00,0x00
 
 # GFX10: ds_gws_init v0 gds              ; encoding: [0x00,0x00,0x66,0xd8,0x00,0x00,0x00,0x00]
 0x00,0x00,0x66,0xd8,0x00,0x00,0x00,0x00
@@ -3596,14 +3596,14 @@
 # GFX10: ds_gws_init v0 offset:65535 gds ; encoding: [0xff,0xff,0x66,0xd8,0x00,0x00,0x00,0x00]
 0xff,0xff,0x66,0xd8,0x00,0x00,0x00,0x00
 
-# GFX10: ds_gws_init v255 gds            ; encoding: [0x00,0x00,0x66,0xd8,0x00,0xff,0x00,0x00]
-0x00,0x00,0x66,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_init v255 gds            ; encoding: [0x00,0x00,0x66,0xd8,0xff,0x00,0x00,0x00]
+0x00,0x00,0x66,0xd8,0xff,0x00,0x00,0x00
 
-# GFX10: ds_gws_init v255 offset:4660 gds ; encoding: [0x34,0x12,0x66,0xd8,0x00,0xff,0x00,0x00]
-0x34,0x12,0x66,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_init v255 offset:4660 gds ; encoding: [0x34,0x12,0x66,0xd8,0xff,0x00,0x00,0x00]
+0x34,0x12,0x66,0xd8,0xff,0x00,0x00,0x00
 
-# GFX10: ds_gws_init v255 offset:65535 gds ; encoding: [0xff,0xff,0x66,0xd8,0x00,0xff,0x00,0x00]
-0xff,0xff,0x66,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_init v255 offset:65535 gds ; encoding: [0xff,0xff,0x66,0xd8,0xff,0x00,0x00,0x00]
+0xff,0xff,0x66,0xd8,0xff,0x00,0x00,0x00
 
 # GFX10: ds_gws_sema_br v0 gds           ; encoding: [0x00,0x00,0x6e,0xd8,0x00,0x00,0x00,0x00]
 0x00,0x00,0x6e,0xd8,0x00,0x00,0x00,0x00
@@ -3614,14 +3614,14 @@
 # GFX10: ds_gws_sema_br v0 offset:65535 gds ; encoding: [0xff,0xff,0x6e,0xd8,0x00,0x00,0x00,0x00]
 0xff,0xff,0x6e,0xd8,0x00,0x00,0x00,0x00
 
-# GFX10: ds_gws_sema_br v255 gds         ; encoding: [0x00,0x00,0x6e,0xd8,0x00,0xff,0x00,0x00]
-0x00,0x00,0x6e,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_sema_br v255 gds         ; encoding: [0x00,0x00,0x6e,0xd8,0xff,0x00,0x00,0x00]
+0x00,0x00,0x6e,0xd8,0xff,0x00,0x00,0x00
 
-# GFX10: ds_gws_sema_br v255 offset:4660 gds ; encoding: [0x34,0x12,0x6e,0xd8,0x00,0xff,0x00,0x00]
-0x34,0x12,0x6e,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_sema_br v255 offset:4660 gds ; encoding: [0x34,0x12,0x6e,0xd8,0xff,0x00,0x00,0x00]
+0x34,0x12,0x6e,0xd8,0xff,0x00,0x00,0x00
 
-# GFX10: ds_gws_sema_br v255 offset:65535 gds ; encoding: [0xff,0xff,0x6e,0xd8,0x00,0xff,0x00,0x00]
-0xff,0xff,0x6e,0xd8,0x00,0xff,0x00,0x00
+# GFX10: ds_gws_sema_br v255 offset:65535 gds ; encoding: [0xff,0xff,0x6e,0xd8,0xff,0x00,0x00,0x00]
+0xff,0xff,0x6e,0xd8,0xff,0x00,0x00,0x00
 
 # GFX10: ds_gws_sema_p  gds              ; encoding: [0x00,0x00,0x72,0xd8,0x00,0x00,0x00,0x00]
 0x00,0x00,0x72,0xd8,0x00,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt
index 2d36a09f53436..64e8d17467bac 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt
@@ -2268,17 +2268,17 @@
 # CHECK: ds_gws_sema_release_all offset:4 gds    ; encoding: [0x04,0x00,0x31,0xd9,0x00,0x00,0x00,0x00]
 0x04,0x00,0x31,0xd9,0x00,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v1 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0x00,0x01,0x00,0x00]
-0xff,0xff,0x33,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_init v1 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0x01,0x00,0x00,0x00]
+0xff,0xff,0x33,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v255 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00]
-0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00
+# CHECK: ds_gws_init v255 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0xff,0x00,0x00,0x00]
+0xff,0xff,0x33,0xd9,0xff,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v1 gds    ; encoding: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
-0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_init v1 gds    ; encoding: [0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
+0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v1 offset:4 gds    ; encoding: [0x04,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
-0x04,0x00,0x33,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_init v1 offset:4 gds    ; encoding: [0x04,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
+0x04,0x00,0x33,0xd9,0x01,0x00,0x00,0x00
 
 # CHECK: ds_gws_sema_v offset:65535 gds    ; encoding: [0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00]
 0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00
@@ -2289,17 +2289,17 @@
 # CHECK: ds_gws_sema_v offset:4 gds    ; encoding: [0x04,0x00,0x35,0xd9,0x00,0x00,0x00,0x00]
 0x04,0x00,0x35,0xd9,0x00,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v1 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0x00,0x01,0x00,0x00]
-0xff,0xff,0x37,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_sema_br v1 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0x01,0x00,0x00,0x00]
+0xff,0xff,0x37,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v255 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00]
-0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00
+# CHECK: ds_gws_sema_br v255 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0xff,0x00,0x00,0x00]
+0xff,0xff,0x37,0xd9,0xff,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v1 gds    ; encoding: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
-0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_sema_br v1 gds    ; encoding: [0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
+0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v1 offset:4 gds    ; encoding: [0x04,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
-0x04,0x00,0x37,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_sema_br v1 offset:4 gds    ; encoding: [0x04,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
+0x04,0x00,0x37,0xd9,0x01,0x00,0x00,0x00
 
 # CHECK: ds_gws_sema_p offset:65535 gds    ; encoding: [0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00]
 0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00
@@ -2310,17 +2310,17 @@
 # CHECK: ds_gws_sema_p offset:4 gds    ; encoding: [0x04,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
 0x04,0x00,0x39,0xd9,0x00,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v1 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0x00,0x01,0x00,0x00]
-0xff,0xff,0x3b,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_barrier v1 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0x01,0x00,0x00,0x00]
+0xff,0xff,0x3b,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v255 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00]
-0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00
+# CHECK: ds_gws_barrier v255 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0xff,0x00,0x00,0x00]
+0xff,0xff,0x3b,0xd9,0xff,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v1 gds    ; encoding: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
-0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_barrier v1 gds    ; encoding: [0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
+0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v1 offset:4 gds    ; encoding: [0x04,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
-0x04,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_barrier v1 offset:4 gds    ; encoding: [0x04,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
+0x04,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00
 
 # CHECK: ds_consume v5 offset:65535    ; encoding: [0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0x05]
 0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
index 566852b9e3276..31dcaf5c14125 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
@@ -2412,17 +2412,17 @@
 # CHECK: ds_gws_sema_release_all offset:4 gds    ; encoding: [0x04,0x00,0x31,0xd9,0x00,0x00,0x00,0x00]
 0x04,0x00,0x31,0xd9,0x00,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v1 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0x00,0x01,0x00,0x00]
-0xff,0xff,0x33,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_init v1 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0x01,0x00,0x00,0x00]
+0xff,0xff,0x33,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v255 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00]
-0xff,0xff,0x33,0xd9,0x00,0xff,0x00,0x00
+# CHECK: ds_gws_init v255 offset:65535 gds    ; encoding: [0xff,0xff,0x33,0xd9,0xff,0x00,0x00,0x00]
+0xff,0xff,0x33,0xd9,0xff,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v1 gds    ; encoding: [0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
-0x00,0x00,0x33,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_init v1 gds    ; encoding: [0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
+0x00,0x00,0x33,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_init v1 offset:4 gds    ; encoding: [0x04,0x00,0x33,0xd9,0x00,0x01,0x00,0x00]
-0x04,0x00,0x33,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_init v1 offset:4 gds    ; encoding: [0x04,0x00,0x33,0xd9,0x01,0x00,0x00,0x00]
+0x04,0x00,0x33,0xd9,0x01,0x00,0x00,0x00
 
 # CHECK: ds_gws_sema_v offset:65535 gds    ; encoding: [0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00]
 0xff,0xff,0x35,0xd9,0x00,0x00,0x00,0x00
@@ -2433,17 +2433,17 @@
 # CHECK: ds_gws_sema_v offset:4 gds    ; encoding: [0x04,0x00,0x35,0xd9,0x00,0x00,0x00,0x00]
 0x04,0x00,0x35,0xd9,0x00,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v1 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0x00,0x01,0x00,0x00]
-0xff,0xff,0x37,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_sema_br v1 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0x01,0x00,0x00,0x00]
+0xff,0xff,0x37,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v255 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00]
-0xff,0xff,0x37,0xd9,0x00,0xff,0x00,0x00
+# CHECK: ds_gws_sema_br v255 offset:65535 gds    ; encoding: [0xff,0xff,0x37,0xd9,0xff,0x00,0x00,0x00]
+0xff,0xff,0x37,0xd9,0xff,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v1 gds    ; encoding: [0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
-0x00,0x00,0x37,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_sema_br v1 gds    ; encoding: [0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
+0x00,0x00,0x37,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_sema_br v1 offset:4 gds    ; encoding: [0x04,0x00,0x37,0xd9,0x00,0x01,0x00,0x00]
-0x04,0x00,0x37,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_sema_br v1 offset:4 gds    ; encoding: [0x04,0x00,0x37,0xd9,0x01,0x00,0x00,0x00]
+0x04,0x00,0x37,0xd9,0x01,0x00,0x00,0x00
 
 # CHECK: ds_gws_sema_p offset:65535 gds    ; encoding: [0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00]
 0xff,0xff,0x39,0xd9,0x00,0x00,0x00,0x00
@@ -2454,17 +2454,17 @@
 # CHECK: ds_gws_sema_p offset:4 gds    ; encoding: [0x04,0x00,0x39,0xd9,0x00,0x00,0x00,0x00]
 0x04,0x00,0x39,0xd9,0x00,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v1 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0x00,0x01,0x00,0x00]
-0xff,0xff,0x3b,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_barrier v1 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0x01,0x00,0x00,0x00]
+0xff,0xff,0x3b,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v255 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00]
-0xff,0xff,0x3b,0xd9,0x00,0xff,0x00,0x00
+# CHECK: ds_gws_barrier v255 offset:65535 gds    ; encoding: [0xff,0xff,0x3b,0xd9,0xff,0x00,0x00,0x00]
+0xff,0xff,0x3b,0xd9,0xff,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v1 gds    ; encoding: [0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
-0x00,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_barrier v1 gds    ; encoding: [0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
+0x00,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00
 
-# CHECK: ds_gws_barrier v1 offset:4 gds    ; encoding: [0x04,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00]
-0x04,0x00,0x3b,0xd9,0x00,0x01,0x00,0x00
+# CHECK: ds_gws_barrier v1 offset:4 gds    ; encoding: [0x04,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00]
+0x04,0x00,0x3b,0xd9,0x01,0x00,0x00,0x00
 
 # CHECK: ds_consume v5 offset:65535    ; encoding: [0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0x05]
 0xff,0xff,0x7a,0xd9,0x00,0x00,0x00,0x05

From 63d00b19e5c4c92396c8221a10a4c67e2ba09f91 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Mon, 15 Jul 2019 14:46:23 +0000
Subject: [PATCH 104/451] [OPENMP]Add support for analysis of if clauses.

Summary:
Added support for analysis of if clauses in the OpenMP directives to be
able to check for the use of uninitialized variables.

Reviewers: NoQ

Subscribers: guansong, jfb, jdoerfert, caomhin, kkwli0, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64646

llvm-svn: 366068
---
 clang/include/clang/AST/OpenMPClause.h        |   7 +-
 clang/lib/AST/OpenMPClause.cpp                |  19 +
 clang/test/Analysis/cfg-openmp.cpp            | 532 ++++++++++--------
 clang/test/OpenMP/cancel_if_messages.cpp      |  10 +
 .../distribute_parallel_for_if_messages.cpp   |   7 +
 ...stribute_parallel_for_simd_if_messages.cpp |   7 +
 .../test/OpenMP/parallel_for_if_messages.cpp  |   7 +
 .../OpenMP/parallel_for_simd_if_messages.cpp  |   7 +
 clang/test/OpenMP/parallel_if_messages.cpp    |   7 +
 .../OpenMP/parallel_sections_if_messages.cpp  |   8 +
 clang/test/OpenMP/target_data_if_messages.cpp |   7 +
 .../OpenMP/target_enter_data_if_messages.cpp  |   7 +
 .../OpenMP/target_exit_data_if_messages.cpp   |   7 +
 clang/test/OpenMP/target_if_messages.cpp      |   7 +
 .../target_parallel_for_if_messages.cpp       |   7 +
 .../target_parallel_for_simd_if_messages.cpp  |   7 +
 .../OpenMP/target_parallel_if_messages.cpp    |   7 +
 clang/test/OpenMP/target_simd_if_messages.cpp |   7 +
 .../target_teams_distribute_if_messages.cpp   |   7 +
 ...ms_distribute_parallel_for_if_messages.cpp |   7 +
 ...stribute_parallel_for_simd_if_messages.cpp |   8 +
 ...rget_teams_distribute_simd_if_messages.cpp |   7 +
 .../test/OpenMP/target_teams_if_messages.cpp  |   7 +
 .../test/OpenMP/target_update_if_messages.cpp |   7 +
 clang/test/OpenMP/task_if_messages.cpp        |   7 +
 ...ms_distribute_parallel_for_if_messages.cpp |   8 +
 ...stribute_parallel_for_simd_if_messages.cpp |   8 +
 27 files changed, 494 insertions(+), 239 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index c6daf73a623bd..eadcc62a34575 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -501,11 +501,10 @@ class OMPIfClause : public OMPClause, public OMPClauseWithPreInit {
     return const_child_range(&Condition, &Condition + 1);
   }
 
-  child_range used_children() {
-    return child_range(child_iterator(), child_iterator());
-  }
+  child_range used_children();
   const_child_range used_children() const {
-    return const_child_range(const_child_iterator(), const_child_iterator());
+    auto Children = const_cast<OMPIfClause *>(this)->used_children();
+    return const_child_range(Children.begin(), Children.end());
   }
 
   static bool classof(const OMPClause *T) {
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 41520b380276c..9d8a7ebc3023e 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -209,6 +209,25 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C)
   return nullptr;
 }
 
+/// Gets the address of the original, non-captured, expression used in the
+/// clause as the preinitializer.
+static Stmt **getAddrOfExprAsWritten(Stmt *S) {
+  if (!S)
+    return nullptr;
+  if (auto *DS = dyn_cast<DeclStmt>(S)) {
+    assert(DS->isSingleDecl() && "Only single expression must be captured.");
+    if (auto *OED = dyn_cast<OMPCapturedExprDecl>(DS->getSingleDecl()))
+      return OED->getInitAddress();
+  }
+  return nullptr;
+}
+
+OMPClause::child_range OMPIfClause::used_children() {
+  if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt()))
+    return child_range(C, C + 1);
+  return child_range(&Condition, &Condition + 1);
+}
+
 OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num,
                                            unsigned NumLoops,
                                            SourceLocation StartLoc,
diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp
index 2f734d14b0216..b608606a83f8c 100644
--- a/clang/test/Analysis/cfg-openmp.cpp
+++ b/clang/test/Analysis/cfg-openmp.cpp
@@ -1,340 +1,402 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp | FileCheck %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp -fopenmp-version=45 | FileCheck %s
 
 // CHECK-LABEL:  void xxx(int argc)
 void xxx(int argc) {
 // CHECK:        [B1]
 // CHECK-NEXT:   1: int x;
-  int x;
-// CHECK-NEXT:   2: x
-// CHECK-NEXT:   3: [B1.2] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:   4: argc
-// CHECK-NEXT:   5: [B1.4] = [B1.3]
-// CHECK-NEXT:   6: #pragma omp atomic read
-// CHECK-NEXT:    [B1.5];
+// CHECK-NEXT:   2: int cond;
+  int x, cond;
+// CHECK-NEXT:   [[#ATOM:]]: x
+// CHECK-NEXT:   [[#ATOM+1]]: [B1.[[#ATOM]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:   [[#ATOM+2]]: argc
+// CHECK-NEXT:   [[#ATOM+3]]: [B1.[[#ATOM+2]]] = [B1.[[#ATOM+1]]]
+// CHECK-NEXT:   [[#ATOM+4]]: #pragma omp atomic read
+// CHECK-NEXT:   [B1.[[#ATOM+3]]];
 #pragma omp atomic read
   argc = x;
-// CHECK-NEXT:   7: x
-// CHECK-NEXT:   8: [B1.7] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:   9: argc
-// CHECK-NEXT:  10: [B1.9] = [B1.8]
-// CHECK-NEXT:  11: #pragma omp critical
-// CHECK-NEXT:    [B1.10];
+// CHECK-NEXT:   [[#CRIT:]]: x
+// CHECK-NEXT:   [[#CRIT+1]]: [B1.[[#CRIT]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:   [[#CRIT+2]]: argc
+// CHECK-NEXT:   [[#CRIT+3]]: [B1.[[#CRIT+2]]] = [B1.[[#CRIT+1]]]
+// CHECK-NEXT:   [[#CRIT+4]]: #pragma omp critical
+// CHECK-NEXT:   [B1.[[#CRIT+3]]];
 #pragma omp critical
   argc = x;
-// CHECK-NEXT:  12: x
-// CHECK-NEXT:  13: [B1.12] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  14: argc
-// CHECK-NEXT:  15: [B1.14] = [B1.13]
-// CHECK-NEXT:  16: #pragma omp distribute parallel for
+// CHECK-NEXT:  [[#DPF:]]: x
+// CHECK-NEXT:  [[#DPF+1]]: [B1.[[#DPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPF+2]]: argc
+// CHECK-NEXT:  [[#DPF+3]]: [B1.[[#DPF+2]]] = [B1.[[#DPF+1]]]
+// CHECK-NEXT:  [[#DPF+4]]: cond
+// CHECK-NEXT:  [[#DPF+5]]: [B1.[[#DPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPF+6]]: [B1.[[#DPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#DPF+7]]: #pragma omp distribute parallel for if(parallel: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.15];
-#pragma omp distribute parallel for
+// CHECK-NEXT:        [B1.[[#DPF+3]]];
+#pragma omp distribute parallel for if(parallel:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  17: x
-// CHECK-NEXT:  18: [B1.17] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  19: argc
-// CHECK-NEXT:  20: [B1.19] = [B1.18]
-// CHECK-NEXT:  21: #pragma omp distribute parallel for simd
+// CHECK-NEXT:  [[#DPFS:]]: x
+// CHECK-NEXT:  [[#DPFS+1]]: [B1.[[#DPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPFS+2]]: argc
+// CHECK-NEXT:  [[#DPFS+3]]: [B1.[[#DPFS+2]]] = [B1.[[#DPFS+1]]]
+// CHECK-NEXT:  [[#DPFS+4]]: cond
+// CHECK-NEXT:  [[#DPFS+5]]: [B1.[[#DPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPFS+6]]: [B1.[[#DPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#DPFS+7]]: #pragma omp distribute parallel for simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.20];
-#pragma omp distribute parallel for simd
+// CHECK-NEXT:        [B1.[[#DPFS+3]]];
+#pragma omp distribute parallel for simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  22: x
-// CHECK-NEXT:  23: [B1.22] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  24: argc
-// CHECK-NEXT:  25: [B1.24] = [B1.23]
-// CHECK-NEXT:  26: #pragma omp distribute simd
+// CHECK-NEXT:  [[#DS:]]: x
+// CHECK-NEXT:  [[#DS+1]]: [B1.[[#DS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DS+2]]: argc
+// CHECK-NEXT:  [[#DS+3]]: [B1.[[#DS+2]]] = [B1.[[#DS+1]]]
+// CHECK-NEXT:  [[#DS+4]]: #pragma omp distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.25];
+// CHECK-NEXT:        [B1.[[#DS+3]]];
 #pragma omp distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  27: x
-// CHECK-NEXT:  28: [B1.27] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  29: argc
-// CHECK-NEXT:  30: [B1.29] = [B1.28]
-// CHECK-NEXT:  31: #pragma omp for
+// CHECK-NEXT:  [[#FOR:]]: x
+// CHECK-NEXT:  [[#FOR+1]]: [B1.[[#FOR]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#FOR+2]]: argc
+// CHECK-NEXT:  [[#FOR+3]]: [B1.[[#FOR+2]]] = [B1.[[#FOR+1]]]
+// CHECK-NEXT:  [[#FOR+4]]: #pragma omp for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.30];
+// CHECK-NEXT:        [B1.[[#FOR+3]]];
 #pragma omp for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  32: x
-// CHECK-NEXT:  33: [B1.32] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  34: argc
-// CHECK-NEXT:  35: [B1.34] = [B1.33]
-// CHECK-NEXT:  36: #pragma omp for simd
+// CHECK-NEXT:  [[#FS:]]: x
+// CHECK-NEXT:  [[#FS+1]]: [B1.[[#FS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#FS+2]]: argc
+// CHECK-NEXT:  [[#FS+3]]: [B1.[[#FS+2]]] = [B1.[[#FS+1]]]
+// CHECK-NEXT:  [[#FS+4]]: #pragma omp for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.35];
+// CHECK-NEXT:        [B1.[[#FS+3]]];
 #pragma omp for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  37: x
-// CHECK-NEXT:  38: [B1.37] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  39: argc
-// CHECK-NEXT:  40: [B1.39] = [B1.38]
-// CHECK-NEXT:  41: #pragma omp master
-// CHECK-NEXT:    [B1.40];
+// CHECK-NEXT:  [[#MASTER:]]: x
+// CHECK-NEXT:  [[#MASTER+1]]: [B1.[[#MASTER]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#MASTER+2]]: argc
+// CHECK-NEXT:  [[#MASTER+3]]: [B1.[[#MASTER+2]]] = [B1.[[#MASTER+1]]]
+// CHECK-NEXT:  [[#MASTER+4]]: #pragma omp master
+// CHECK-NEXT:    [B1.[[#MASTER+3]]];
 #pragma omp master
   argc = x;
-// CHECK-NEXT:  42: x
-// CHECK-NEXT:  43: [B1.42] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  44: argc
-// CHECK-NEXT:  45: [B1.44] = [B1.43]
-// CHECK-NEXT:  46: #pragma omp ordered
-// CHECK-NEXT:    [B1.45];
-// CHECK-NEXT:  47: #pragma omp for ordered
+// CHECK-NEXT:  [[#ORD:]]: x
+// CHECK-NEXT:  [[#ORD+1]]: [B1.[[#ORD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#ORD+2]]: argc
+// CHECK-NEXT:  [[#ORD+3]]: [B1.[[#ORD+2]]] = [B1.[[#ORD+1]]]
+// CHECK-NEXT:  [[#ORD+4]]: #pragma omp ordered
+// CHECK-NEXT:    [B1.[[#ORD+3]]];
+// CHECK-NEXT:  [[#ORD+5]]: #pragma omp for ordered
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i) {
-// CHECK-NEXT:[B1.46]    }
+// CHECK-NEXT:[B1.[[#ORD+4]]]    }
 #pragma omp for ordered
   for (int i = 0; i < 10; ++i) {
 #pragma omp ordered
     argc = x;
   }
-// CHECK-NEXT:  48: x
-// CHECK-NEXT:  49: [B1.48] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  50: argc
-// CHECK-NEXT:  51: [B1.50] = [B1.49]
-// CHECK-NEXT:  52: #pragma omp parallel for
+// CHECK-NEXT:  [[#PF:]]: x
+// CHECK-NEXT:  [[#PF+1]]: [B1.[[#PF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PF+2]]: argc
+// CHECK-NEXT:  [[#PF+3]]: [B1.[[#PF+2]]] = [B1.[[#PF+1]]]
+// CHECK-NEXT:  [[#PF+4]]: cond
+// CHECK-NEXT:  [[#PF+5]]: [B1.[[#PF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PF+6]]: [B1.[[#PF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PF+7]]: #pragma omp parallel for if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.51];
-#pragma omp parallel for
+// CHECK-NEXT:        [B1.[[#PF+3]]];
+#pragma omp parallel for if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  53: x
-// CHECK-NEXT:  54: [B1.53] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  55: argc
-// CHECK-NEXT:  56: [B1.55] = [B1.54]
-// CHECK-NEXT:  57: #pragma omp parallel for simd
+// CHECK-NEXT:  [[#PFS:]]: x
+// CHECK-NEXT:  [[#PFS+1]]: [B1.[[#PFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PFS+2]]: argc
+// CHECK-NEXT:  [[#PFS+3]]: [B1.[[#PFS+2]]] = [B1.[[#PFS+1]]]
+// CHECK-NEXT:  [[#PFS+4]]: cond
+// CHECK-NEXT:  [[#PFS+5]]: [B1.[[#PFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PFS+6]]: [B1.[[#PFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PFS+7]]: #pragma omp parallel for simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.56];
-#pragma omp parallel for simd
+// CHECK-NEXT:        [B1.[[#PFS+3]]];
+#pragma omp parallel for simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  58: x
-// CHECK-NEXT:  59: [B1.58] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  60: argc
-// CHECK-NEXT:  61: [B1.60] = [B1.59]
-// CHECK-NEXT:  62: #pragma omp parallel
-// CHECK-NEXT:    [B1.61];
-#pragma omp parallel
+// CHECK-NEXT:  [[#PAR:]]: x
+// CHECK-NEXT:  [[#PAR+1]]: [B1.[[#PAR]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PAR+2]]: argc
+// CHECK-NEXT:  [[#PAR+3]]: [B1.[[#PAR+2]]] = [B1.[[#PAR+1]]]
+// CHECK-NEXT:  [[#PAR+4]]: cond
+// CHECK-NEXT:  [[#PAR+5]]: [B1.[[#PAR+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PAR+6]]: [B1.[[#PAR+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PAR+7]]: #pragma omp parallel if(cond)
+// CHECK-NEXT:    [B1.[[#PAR+3]]];
+#pragma omp parallel if(cond)
   argc = x;
-// CHECK-NEXT:  63: x
-// CHECK-NEXT:  64: [B1.63] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  65: argc
-// CHECK-NEXT:  66: [B1.65] = [B1.64]
-// CHECK-NEXT:  67: #pragma omp parallel sections
+// CHECK-NEXT:  [[#PSECT:]]: x
+// CHECK-NEXT:  [[#PSECT+1]]: [B1.[[#PSECT]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PSECT+2]]: argc
+// CHECK-NEXT:  [[#PSECT+3]]: [B1.[[#PSECT+2]]] = [B1.[[#PSECT+1]]]
+// CHECK-NEXT:  [[#PSECT+4]]: cond
+// CHECK-NEXT:  [[#PSECT+5]]: [B1.[[#PSECT+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PSECT+6]]: [B1.[[#PSECT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PSECT+7]]: #pragma omp parallel sections if(cond)
 // CHECK-NEXT:    {
-// CHECK-NEXT:        [B1.66];
+// CHECK-NEXT:        [B1.[[#PSECT+3]]];
 // CHECK-NEXT:    }
-#pragma omp parallel sections
+#pragma omp parallel sections if(cond)
   {
     argc = x;
   }
-// CHECK-NEXT:  68: x
-// CHECK-NEXT:  69: [B1.68] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  70: argc
-// CHECK-NEXT:  71: [B1.70] = [B1.69]
-// CHECK-NEXT:  72: #pragma omp simd
+// CHECK-NEXT:  [[#SIMD:]]: x
+// CHECK-NEXT:  [[#SIMD+1]]: [B1.[[#SIMD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#SIMD+2]]: argc
+// CHECK-NEXT:  [[#SIMD+3]]: [B1.[[#SIMD+2]]] = [B1.[[#SIMD+1]]]
+// CHECK-NEXT:  [[#SIMD+4]]: #pragma omp simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.71];
+// CHECK-NEXT:        [B1.[[#SIMD+3]]];
 #pragma omp simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  73: x
-// CHECK-NEXT:  74: [B1.73] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  75: argc
-// CHECK-NEXT:  76: [B1.75] = [B1.74]
-// CHECK-NEXT:  77: #pragma omp single
-// CHECK-NEXT:    [B1.76];
+// CHECK-NEXT:  [[#SINGLE:]]: x
+// CHECK-NEXT:  [[#SINGLE+1]]: [B1.[[#SINGLE]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#SINGLE+2]]: argc
+// CHECK-NEXT:  [[#SINGLE+3]]: [B1.[[#SINGLE+2]]] = [B1.[[#SINGLE+1]]]
+// CHECK-NEXT:  [[#SINGLE+4]]: #pragma omp single
+// CHECK-NEXT:    [B1.[[#SINGLE+3]]];
 #pragma omp single
   argc = x;
-// CHECK-NEXT:  78: x
-// CHECK-NEXT:  79: [B1.78] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  80: argc
-// CHECK-NEXT:  81: [B1.80] = [B1.79]
-// CHECK-NEXT:  82: #pragma omp target depend(in : argc)
-// CHECK-NEXT:    [B1.81];
+// CHECK-NEXT:  [[#TARGET:]]: x
+// CHECK-NEXT:  [[#TARGET+1]]: [B1.[[#TARGET]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TARGET+2]]: argc
+// CHECK-NEXT:  [[#TARGET+3]]: [B1.[[#TARGET+2]]] = [B1.[[#TARGET+1]]]
+// CHECK-NEXT:  [[#TARGET+4]]: cond
+// CHECK-NEXT:  [[#TARGET+5]]: [B1.[[#TARGET+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TARGET+6]]: [B1.[[#TARGET+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TARGET+7]]: #pragma omp target depend(in : argc) if(cond)
+// CHECK-NEXT:    [B1.[[#TARGET+3]]];
 #pragma omp target depend(in \
-                          : argc)
+                          : argc) if(cond)
   argc = x;
-// CHECK-NEXT:  83: x
-// CHECK-NEXT:  84: [B1.83] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  85: argc
-// CHECK-NEXT:  86: [B1.85] = [B1.84]
-// CHECK-NEXT:  87: #pragma omp target parallel for
+// CHECK-NEXT:  [[#TPF:]]: x
+// CHECK-NEXT:  [[#TPF+1]]: [B1.[[#TPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPF+2]]: argc
+// CHECK-NEXT:  [[#TPF+3]]: [B1.[[#TPF+2]]] = [B1.[[#TPF+1]]]
+// CHECK-NEXT:  [[#TPF+4]]: cond
+// CHECK-NEXT:  [[#TPF+5]]: [B1.[[#TPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPF+6]]: [B1.[[#TPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TPF+7]]: #pragma omp target parallel for if(parallel: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.86];
-#pragma omp target parallel for
+// CHECK-NEXT:        [B1.[[#TPF+3]]];
+#pragma omp target parallel for if(parallel:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  88: x
-// CHECK-NEXT:  89: [B1.88] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  90: argc
-// CHECK-NEXT:  91: [B1.90] = [B1.89]
-// CHECK-NEXT:  92: #pragma omp target parallel for simd
+// CHECK-NEXT:  [[#TPFS:]]: x
+// CHECK-NEXT:  [[#TPFS+1]]: [B1.[[#TPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPFS+2]]: argc
+// CHECK-NEXT:  [[#TPFS+3]]: [B1.[[#TPFS+2]]] = [B1.[[#TPFS+1]]]
+// CHECK-NEXT:  [[#TPFS+4]]: cond
+// CHECK-NEXT:  [[#TPFS+5]]: [B1.[[#TPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPFS+6]]: [B1.[[#TPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TPFS+7]]: #pragma omp target parallel for simd if(target: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.91];
-#pragma omp target parallel for simd
+// CHECK-NEXT:        [B1.[[#TPFS+3]]];
+#pragma omp target parallel for simd if(target:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  93: x
-// CHECK-NEXT:  94: [B1.93] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  95: argc
-// CHECK-NEXT:  96: [B1.95] = [B1.94]
-// CHECK-NEXT:  97: #pragma omp target parallel
-// CHECK-NEXT:    [B1.96];
-#pragma omp target parallel
+// CHECK-NEXT:  [[#TP:]]: x
+// CHECK-NEXT:  [[#TP+1]]: [B1.[[#TP]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TP+2]]: argc
+// CHECK-NEXT:  [[#TP+3]]: [B1.[[#TP+2]]] = [B1.[[#TP+1]]]
+// CHECK-NEXT:  [[#TP+4]]: cond
+// CHECK-NEXT:  [[#TP+5]]: [B1.[[#TP+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TP+6]]: [B1.[[#TP+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TP+7]]: #pragma omp target parallel if(cond)
+// CHECK-NEXT:    [B1.[[#TP+3]]];
+#pragma omp target parallel if(cond)
   argc = x;
-// CHECK-NEXT:  98: x
-// CHECK-NEXT:  99: [B1.98] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 100: argc
-// CHECK-NEXT: 101: [B1.100] = [B1.99]
-// CHECK-NEXT: 102: #pragma omp target simd
+// CHECK-NEXT:  [[#TSIMD:]]: x
+// CHECK-NEXT:  [[#TSIMD+1]]: [B1.[[#TSIMD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TSIMD+2]]: argc
+// CHECK-NEXT:  [[#TSIMD+3]]: [B1.[[#TSIMD+2]]] = [B1.[[#TSIMD+1]]]
+// CHECK-NEXT:  [[#TSIMD+4]]: cond
+// CHECK-NEXT:  [[#TSIMD+5]]: [B1.[[#TSIMD+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TSIMD+6]]: [B1.[[#TSIMD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TSIMD+7]]: #pragma omp target simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.101];
-#pragma omp target simd
+// CHECK-NEXT:        [B1.[[#TSIMD+3]]];
+#pragma omp target simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 103: x
-// CHECK-NEXT: 104: [B1.103] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 105: argc
-// CHECK-NEXT: 106: [B1.105] = [B1.104]
-// CHECK-NEXT: 107: #pragma omp target teams distribute
+// CHECK-NEXT:  [[#TTD:]]: x
+// CHECK-NEXT:  [[#TTD+1]]: [B1.[[#TTD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTD+2]]: argc
+// CHECK-NEXT:  [[#TTD+3]]: [B1.[[#TTD+2]]] = [B1.[[#TTD+1]]]
+// CHECK-NEXT:  [[#TTD+4]]: cond
+// CHECK-NEXT:  [[#TTD+5]]: [B1.[[#TTD+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTD+6]]: [B1.[[#TTD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTD+7]]: #pragma omp target teams distribute if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.106];
-#pragma omp target teams distribute
+// CHECK-NEXT:        [B1.[[#TTD+3]]];
+#pragma omp target teams distribute if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 108: x
-// CHECK-NEXT: 109: [B1.108] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 110: argc
-// CHECK-NEXT: 111: [B1.110] = [B1.109]
-// CHECK-NEXT: 112: #pragma omp target teams distribute parallel for
+// CHECK-NEXT:  [[#TTDPF:]]: x
+// CHECK-NEXT:  [[#TTDPF+1]]: [B1.[[#TTDPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPF+2]]: argc
+// CHECK-NEXT:  [[#TTDPF+3]]: [B1.[[#TTDPF+2]]] = [B1.[[#TTDPF+1]]]
+// CHECK-NEXT:  [[#TTDPF+4]]: cond
+// CHECK-NEXT:  [[#TTDPF+5]]: [B1.[[#TTDPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPF+6]]: [B1.[[#TTDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTDPF+7]]: #pragma omp target teams distribute parallel for if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.111];
-#pragma omp target teams distribute parallel for
+// CHECK-NEXT:        [B1.[[#TTDPF+3]]];
+#pragma omp target teams distribute parallel for if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 113: x
-// CHECK-NEXT: 114: [B1.113] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 115: argc
-// CHECK-NEXT: 116: [B1.115] = [B1.114]
-// CHECK-NEXT: 117: #pragma omp target teams distribute parallel for simd
+// CHECK-NEXT:  [[#TTDPFS:]]: x
+// CHECK-NEXT:  [[#TTDPFS+1]]: [B1.[[#TTDPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPFS+2]]: argc
+// CHECK-NEXT:  [[#TTDPFS+3]]: [B1.[[#TTDPFS+2]]] = [B1.[[#TTDPFS+1]]]
+// CHECK-NEXT:  [[#TTDPFS+4]]: cond
+// CHECK-NEXT:  [[#TTDPFS+5]]: [B1.[[#TTDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPFS+6]]: [B1.[[#TTDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTDPFS+7]]: #pragma omp target teams distribute parallel for simd if(parallel: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.116];
-#pragma omp target teams distribute parallel for simd
+// CHECK-NEXT:        [B1.[[#TTDPFS+3]]];
+#pragma omp target teams distribute parallel for simd if(parallel:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 118: x
-// CHECK-NEXT: 119: [B1.118] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 120: argc
-// CHECK-NEXT: 121: [B1.120] = [B1.119]
-// CHECK-NEXT: 122: #pragma omp target teams distribute simd
+// CHECK-NEXT:  [[#TTDS:]]: x
+// CHECK-NEXT:  [[#TTDS+1]]: [B1.[[#TTDS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDS+2]]: argc
+// CHECK-NEXT:  [[#TTDS+3]]: [B1.[[#TTDS+2]]] = [B1.[[#TTDS+1]]]
+// CHECK-NEXT:  [[#TTDS+4]]: cond
+// CHECK-NEXT:  [[#TTDS+5]]: [B1.[[#TTDS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDS+6]]: [B1.[[#TTDS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTDS+7]]: #pragma omp target teams distribute simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.121];
-#pragma omp target teams distribute simd
+// CHECK-NEXT:        [B1.[[#TTDS+3]]];
+#pragma omp target teams distribute simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 123: x
-// CHECK-NEXT: 124: [B1.123] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 125: argc
-// CHECK-NEXT: 126: [B1.125] = [B1.124]
-// CHECK-NEXT: 127: #pragma omp target teams
-// CHECK-NEXT:    [B1.126];
-#pragma omp target teams
+// CHECK-NEXT:  [[#TT:]]: x
+// CHECK-NEXT:  [[#TT+1]]: [B1.[[#TT]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TT+2]]: argc
+// CHECK-NEXT:  [[#TT+3]]: [B1.[[#TT+2]]] = [B1.[[#TT+1]]]
+// CHECK-NEXT:  [[#TT+4]]: cond
+// CHECK-NEXT:  [[#TT+5]]: [B1.[[#TT+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TT+6]]: [B1.[[#TT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TT+7]]: #pragma omp target teams if(cond)
+// CHECK-NEXT:    [B1.[[#TT+3]]];
+#pragma omp target teams if(cond)
   argc = x;
-// CHECK-NEXT: 128: #pragma omp target update to(x)
-#pragma omp target update to(x)
-// CHECK-NEXT: 129: x
-// CHECK-NEXT: 130: [B1.129] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 131: argc
-// CHECK-NEXT: 132: [B1.131] = [B1.130]
+// CHECK-NEXT: [[#TU:]]: cond
+// CHECK-NEXT: [[#TU+1]]: [B1.[[#TU]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: [[#TU+2]]: [B1.[[#TU+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT: [[#TU+3]]: #pragma omp target update to(x) if(target update: cond)
+#pragma omp target update to(x) if(target update:cond)
+// CHECK-NEXT:  [[#TASK:]]: x
+// CHECK-NEXT:  [[#TASK+1]]: [B1.[[#TASK]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TASK+2]]: argc
+// CHECK-NEXT:  [[#TASK+3]]: [B1.[[#TASK+2]]] = [B1.[[#TASK+1]]]
+// CHECK-NEXT:  [[#TASK+4]]: cond
+// CHECK-NEXT:  [[#TASK+5]]: [B1.[[#TASK+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TASK+6]]: [B1.[[#TASK+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TASK+7]]: #pragma omp task if(cond)
+// CHECK-NEXT:    [B1.[[#TASK+3]]];
+#pragma omp task if(cond)
   argc = x;
-// CHECK-NEXT: 133: x
-// CHECK-NEXT: 134: [B1.133] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 135: argc
-// CHECK-NEXT: 136: [B1.135] = [B1.134]
-// CHECK-NEXT: 137: #pragma omp task
-// CHECK-NEXT:    [B1.136];
-#pragma omp task
-  argc = x;
-// CHECK-NEXT: 138: x
-// CHECK-NEXT: 139: [B1.138] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 140: argc
-// CHECK-NEXT: 141: [B1.140] = [B1.139]
-// CHECK-NEXT: 142: #pragma omp taskgroup
-// CHECK-NEXT:    [B1.141];
+// CHECK-NEXT:  [[#TG:]]: x
+// CHECK-NEXT:  [[#TG+1]]: [B1.[[#TG]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TG+2]]: argc
+// CHECK-NEXT:  [[#TG+3]]: [B1.[[#TG+2]]] = [B1.[[#TG+1]]]
+// CHECK-NEXT:  [[#TG+4]]: #pragma omp taskgroup
+// CHECK-NEXT:    [B1.[[#TG+3]]];
 #pragma omp taskgroup
   argc = x;
-// CHECK-NEXT: 143: x
-// CHECK-NEXT: 144: [B1.143] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 145: argc
-// CHECK-NEXT: 146: [B1.145] = [B1.144]
-// CHECK-NEXT: 147: #pragma omp taskloop
+// CHECK-NEXT:  [[#TL:]]: x
+// CHECK-NEXT:  [[#TL+1]]: [B1.[[#TL]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TL+2]]: argc
+// CHECK-NEXT:  [[#TL+3]]: [B1.[[#TL+2]]] = [B1.[[#TL+1]]]
+// CHECK-NEXT:  [[#TL+4]]: cond
+// CHECK-NEXT:  [[#TL+5]]: [B1.[[#TL+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TL+6]]: [B1.[[#TL+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TL+7]]: #pragma omp taskloop if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.146];
-#pragma omp taskloop
+// CHECK-NEXT:        [B1.[[#TL+3]]];
+#pragma omp taskloop if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 148: x
-// CHECK-NEXT: 149: [B1.148] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 150: argc
-// CHECK-NEXT: 151: [B1.150] = [B1.149]
-// CHECK-NEXT: 152: #pragma omp taskloop simd
+// CHECK-NEXT:  [[#TLS:]]: x
+// CHECK-NEXT:  [[#TLS+1]]: [B1.[[#TLS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TLS+2]]: argc
+// CHECK-NEXT:  [[#TLS+3]]: [B1.[[#TLS+2]]] = [B1.[[#TLS+1]]]
+// CHECK-NEXT:  [[#TLS+4]]: cond
+// CHECK-NEXT:  [[#TLS+5]]: [B1.[[#TLS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TLS+6]]: [B1.[[#TLS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TLS+7]]: #pragma omp taskloop simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.151];
-#pragma omp taskloop simd
+// CHECK-NEXT:        [B1.[[#TLS+3]]];
+#pragma omp taskloop simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 153: x
-// CHECK-NEXT: 154: [B1.153] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 155: argc
-// CHECK-NEXT: 156: [B1.155] = [B1.154]
-// CHECK-NEXT: 157: #pragma omp teams distribute parallel for
+// CHECK-NEXT:  [[#TDPF:]]: x
+// CHECK-NEXT:  [[#TDPF+1]]: [B1.[[#TDPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPF+2]]: argc
+// CHECK-NEXT:  [[#TDPF+3]]: [B1.[[#TDPF+2]]] = [B1.[[#TDPF+1]]]
+// CHECK-NEXT:  [[#TDPF+4]]: cond
+// CHECK-NEXT:  [[#TDPF+5]]: [B1.[[#TDPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPF+6]]: [B1.[[#TDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TDPF+7]]: #pragma omp teams distribute parallel for if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.156];
-// CHECK-NEXT: 158: #pragma omp target
+// CHECK-NEXT:        [B1.[[#TDPF+3]]];
+// CHECK-NEXT:  [[#TDPF+8]]: #pragma omp target
 #pragma omp target
-#pragma omp teams distribute parallel for
+#pragma omp teams distribute parallel for if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:[B1.157] 159: x
-// CHECK-NEXT: 160: [B1.159] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 161: argc
-// CHECK-NEXT: 162: [B1.161] = [B1.160]
-// CHECK-NEXT: 163: #pragma omp teams distribute parallel for simd
+// CHECK-NEXT:  [B1.[[#TDPF+7]]] [[#TDPFS:]]: x
+// CHECK-NEXT:  [[#TDPFS+1]]: [B1.[[#TDPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPFS+2]]: argc
+// CHECK-NEXT:  [[#TDPFS+3]]: [B1.[[#TDPFS+2]]] = [B1.[[#TDPFS+1]]]
+// CHECK-NEXT:  [[#TDPFS+4]]: cond
+// CHECK-NEXT:  [[#TDPFS+5]]: [B1.[[#TDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPFS+6]]: [B1.[[#TDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TDPFS+7]]: #pragma omp teams distribute parallel for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.162];
-// CHECK-NEXT: 164: #pragma omp target
+// CHECK-NEXT:        [B1.[[#TDPFS+3]]];
+// CHECK-NEXT:  [[#TDPFS+8]]: #pragma omp target
 #pragma omp target
-#pragma omp teams distribute parallel for simd
+#pragma omp teams distribute parallel for simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:[B1.163] 165: x
-// CHECK-NEXT: 166: [B1.165] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 167: argc
-// CHECK-NEXT: 168: [B1.167] = [B1.166]
-// CHECK-NEXT: 169: #pragma omp teams distribute simd
+// CHECK-NEXT:  [B1.[[#TDPFS+7]]] [[#TDS:]]: x
+// CHECK-NEXT:  [[#TDS+1]]: [B1.[[#TDS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDS+2]]: argc
+// CHECK-NEXT:  [[#TDS+3]]: [B1.[[#TDS+2]]] = [B1.[[#TDS+1]]]
+// CHECK-NEXT:  [[#TDS+4]]: #pragma omp teams distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.168];
-// CHECK-NEXT: 170: #pragma omp target
+// CHECK-NEXT:        [B1.[[#TDS+3]]];
+// CHECK-NEXT:  [[#TDS+5]]: #pragma omp target
 #pragma omp target
 #pragma omp teams distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:[B1.169] 171: x
-// CHECK-NEXT: 172: [B1.171] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 173: argc
-// CHECK-NEXT: 174: [B1.173] = [B1.172]
-// CHECK-NEXT: 175: #pragma omp teams
-// CHECK-NEXT:    [B1.174];
-// CHECK-NEXT: 176: #pragma omp target
+// CHECK-NEXT:  [B1.[[#TDS+4]]] [[#TEAMS:]]: x
+// CHECK-NEXT:  [[#TEAMS+1]]: [B1.[[#TEAMS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TEAMS+2]]: argc
+// CHECK-NEXT:  [[#TEAMS+3]]: [B1.[[#TEAMS+2]]] = [B1.[[#TEAMS+1]]]
+// CHECK-NEXT:  [[#TEAMS+4]]: #pragma omp teams
+// CHECK-NEXT:    [B1.[[#TEAMS+3]]];
+// CHECK-NEXT:  [[#TEAMS+5]]: #pragma omp target
 #pragma omp target
 #pragma omp teams
   argc = x;
-// CHECK-NEXT:[B1.175]   Preds
+// CHECK-NEXT:  [B1.[[#TEAMS+4]]]   Preds
 }
 
diff --git a/clang/test/OpenMP/cancel_if_messages.cpp b/clang/test/OpenMP/cancel_if_messages.cpp
index 3d629c927e907..222087ca9e61b 100644
--- a/clang/test/OpenMP/cancel_if_messages.cpp
+++ b/clang/test/OpenMP/cancel_if_messages.cpp
@@ -9,6 +9,16 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel
+  {
+#pragma omp cancel parallel if (cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
index a06ff2377c043..e628a15c3ab44 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
index 7769272026e6b..6cf18faf0a87f 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_for_if_messages.cpp b/clang/test/OpenMP/parallel_for_if_messages.cpp
index 32f9ef3a7defa..56bb06be0cc71 100644
--- a/clang/test/OpenMP/parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
index aa1e302d04242..bab9339d49174 100644
--- a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_if_messages.cpp b/clang/test/OpenMP/parallel_if_messages.cpp
index 7f802a9e4236a..f095e66bbfa5e 100644
--- a/clang/test/OpenMP/parallel_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_sections_if_messages.cpp b/clang/test/OpenMP/parallel_sections_if_messages.cpp
index 8d36b6d5d3086..b7c92df4f30df 100644
--- a/clang/test/OpenMP/parallel_sections_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_sections_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel sections if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  {
+    ;
+  }
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_data_if_messages.cpp b/clang/test/OpenMP/target_data_if_messages.cpp
index c6f9b4b34eeea..29f898c6d9fa7 100644
--- a/clang/test/OpenMP/target_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_data_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target data map(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_enter_data_if_messages.cpp b/clang/test/OpenMP/target_enter_data_if_messages.cpp
index 5123d607dc6a1..21019e9ae7f8c 100644
--- a/clang/test/OpenMP/target_enter_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_enter_data_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target enter data map(to:argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_exit_data_if_messages.cpp b/clang/test/OpenMP/target_exit_data_if_messages.cpp
index c45b32ff3fe75..7b2385c16cd21 100644
--- a/clang/test/OpenMP/target_exit_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_exit_data_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target exit data map(from: argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_if_messages.cpp b/clang/test/OpenMP/target_if_messages.cpp
index e6b667f2cffbf..f381e9eb91ebd 100644
--- a/clang/test/OpenMP/target_if_messages.cpp
+++ b/clang/test/OpenMP/target_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_if_messages.cpp
index 445dc1775b0f2..a5a181b9d273a 100644
--- a/clang/test/OpenMP/target_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
index b0da8017019f0..ef9a2089d1087 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target parallel for simd if(parallel: cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_if_messages.cpp b/clang/test/OpenMP/target_parallel_if_messages.cpp
index 460e0c8655f09..ac498a7108b0d 100644
--- a/clang/test/OpenMP/target_parallel_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_simd_if_messages.cpp b/clang/test/OpenMP/target_simd_if_messages.cpp
index 94d2ab308daa2..5f3e9e3910ac6 100644
--- a/clang/test/OpenMP/target_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
index fd1ffb08cbe8c..499cd3ac58050 100644
--- a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
index e1114028b6877..6df23076472ec 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
index 59c75893a1714..e88c1f1dbbfff 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute parallel for simd if (parallel \
+                                                          : cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
index 7134a8394cbb8..53af6e759d21e 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_if_messages.cpp b/clang/test/OpenMP/target_teams_if_messages.cpp
index 8d3d690d631fa..4bc82a349398d 100644
--- a/clang/test/OpenMP/target_teams_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_update_if_messages.cpp b/clang/test/OpenMP/target_update_if_messages.cpp
index 9ded332b04eb9..d967713e456fb 100644
--- a/clang/test/OpenMP/target_update_if_messages.cpp
+++ b/clang/test/OpenMP/target_update_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target update to(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/task_if_messages.cpp b/clang/test/OpenMP/task_if_messages.cpp
index 305af22149d85..2d47b32b9a153 100644
--- a/clang/test/OpenMP/task_if_messages.cpp
+++ b/clang/test/OpenMP/task_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp task if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
index 6f724b050178a..b76599d41a46a 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target
+#pragma omp teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
index c01e6e87e39a5..39a0b326383a2 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target
+#pragma omp teams distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}

From cb238de456ba2db699d969858eddbc0971c8592c Mon Sep 17 00:00:00 2001
From: Dmitry Venikov <quolyk@gmail.com>
Date: Mon, 15 Jul 2019 14:47:45 +0000
Subject: [PATCH 105/451] [PatternMatch] Implement matching code for LibFunc

Summary: Provides m_LibFunc pattern that can be used to match LibFuncs.

Reviewers: spatel, hfinkel, efriedma, lebedev.ri

Reviewed By: lebedev.ri

Subscribers: lebedev.ri, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D42047

llvm-svn: 366069
---
 llvm/include/llvm/IR/PatternMatch.h | 52 +++++++++++++++++++++++++++++
 llvm/unittests/IR/PatternMatch.cpp  | 38 ++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 0f03d7cc56b88..b45a6b0bc8b8f 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -30,6 +30,7 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstrTypes.h"
@@ -1812,6 +1813,57 @@ template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
   return Signum_match<Val_t>(V);
 }
 
+/// \brief LibFunc matchers.
+struct LibFunc_match {
+  LibFunc F;
+  TargetLibraryInfo TLI;
+  
+  LibFunc_match(LibFunc Func, TargetLibraryInfo TargetLI)
+  : F(Func), TLI(TargetLI) {}
+  
+  template <typename OpTy> bool match(OpTy *V) {
+    LibFunc LF;
+    if (const auto *CI = dyn_cast<CallInst>(V))
+      if (!CI->isNoBuiltin() && CI->getCalledFunction() &&
+          TLI.getLibFunc(*CI->getCalledFunction(), LF) &&
+          LF == F && TLI.has(LF))
+        return true;
+    return false;
+  }
+};
+
+/// LibFunc matches are combinations of Name matchers, and argument
+/// matchers.
+template <typename T0 = void, typename T1 = void, typename T2 = void>
+struct m_LibFunc_Ty;
+template <typename T0> struct m_LibFunc_Ty<T0> {
+  using Ty = match_combine_and<LibFunc_match, Argument_match<T0>>;
+};
+template <typename T0, typename T1> struct m_LibFunc_Ty<T0, T1> {
+  using Ty =
+  match_combine_and<typename m_LibFunc_Ty<T0>::Ty,
+  Argument_match<T1>>;
+};
+
+/// \brief Match LibFunc calls like this:
+/// m_LibFunc<LibFunc_tan>(m_Value(X))
+template <LibFunc F>
+inline LibFunc_match m_LibFunc(TargetLibraryInfo TLI) {
+  return LibFunc_match(F, TLI);
+}
+
+template <LibFunc F, typename T0>
+inline typename m_LibFunc_Ty<T0>::Ty
+m_LibFunc(const TargetLibraryInfo TLI, const T0 &Op0) {
+  return m_CombineAnd(m_LibFunc<F>(TLI), m_Argument<0>(Op0));
+}
+
+template <LibFunc F, typename T0, typename T1>
+inline typename m_LibFunc_Ty<T0, T1>::Ty
+m_LibFunc(const TargetLibraryInfo TLI, const T0 &Op0, const T1 &Op1) {
+  return m_CombineAnd(m_LibFunc<F>(TLI, Op0), m_Argument<1>(Op1));
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 600494fba2671..3f8e691801951 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -35,13 +35,15 @@ struct PatternMatchTest : ::testing::Test {
   Function *F;
   BasicBlock *BB;
   IRBuilder<NoFolder> IRB;
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI;
 
   PatternMatchTest()
       : M(new Module("PatternMatchTestModule", Ctx)),
         F(Function::Create(
             FunctionType::get(Type::getVoidTy(Ctx), /* IsVarArg */ false),
             Function::ExternalLinkage, "f", M.get())),
-        BB(BasicBlock::Create(Ctx, "entry", F)), IRB(BB) {}
+        BB(BasicBlock::Create(Ctx, "entry", F)), IRB(BB), TLI(TLII) {}
 };
 
 TEST_F(PatternMatchTest, OneUse) {
@@ -1008,6 +1010,40 @@ TEST_F(PatternMatchTest, FloatingPointFNeg) {
   EXPECT_FALSE(match(V3, m_FNeg(m_Value(Match))));
 }
 
+TEST_F(PatternMatchTest, LibFunc) {
+  Type *FltTy = IRB.getFloatTy();
+  Value *One = ConstantFP::get(FltTy, 1.0);
+  Value *Two = ConstantFP::get(FltTy, 2.0);
+  Value *MatchOne, *MatchTwo;
+
+  StringRef TanName = TLI.getName(LibFunc_tan);
+  FunctionCallee TanCallee = M->getOrInsertFunction(TanName, FltTy, FltTy);
+  CallInst *Tan = IRB.CreateCall(TanCallee, One, TanName);
+
+  StringRef PowName = TLI.getName(LibFunc_pow);
+  FunctionCallee PowCallee = M->getOrInsertFunction(PowName, FltTy, FltTy, FltTy);
+  CallInst *Pow = IRB.CreateCall(PowCallee, {One, Two}, PowName);
+
+  EXPECT_TRUE(match(Tan, m_LibFunc<LibFunc_tan>(TLI)));
+  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_pow>(TLI)));
+  EXPECT_FALSE(match(Pow, m_LibFunc<LibFunc_tan>(TLI)));
+
+  EXPECT_TRUE(match(Tan, m_LibFunc<LibFunc_tan>(TLI, m_Value(MatchOne))));
+  EXPECT_EQ(One, MatchOne);
+  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_sin>(TLI, m_Value())));
+
+  EXPECT_TRUE(match(Pow, m_LibFunc<LibFunc_pow>(TLI, m_Value(MatchOne),
+                                                m_Value(MatchTwo))));
+  EXPECT_EQ(One, MatchOne);
+  EXPECT_EQ(Two, MatchTwo);
+  EXPECT_FALSE(match(Pow, m_LibFunc<LibFunc_fminf>(TLI, m_Value(), m_Value())));
+  
+  TLII.disableAllFunctions();
+  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_tan>(TLI)));
+  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_tan>(TLI, m_Value())));
+  EXPECT_FALSE(match(Pow, m_LibFunc<LibFunc_pow>(TLI, m_Value(), m_Value())));
+}
+
 template <typename T> struct MutableConstTest : PatternMatchTest { };
 
 typedef ::testing::Types<std::tuple<Value*, Instruction*>,

From 4e34a85aa2e74154a9dd48f32d5dc75cb1e7c5bd Mon Sep 17 00:00:00 2001
From: Johan Vikstrom <jvikstrom@google.com>
Date: Mon, 15 Jul 2019 15:08:27 +0000
Subject: [PATCH 106/451] [clangd] Fix duplicate highlighting tokens appearing
 in initializer lists.

Summary: The RecursiveASTVisitor sometimes visits exprs in initializer lists twice. Added deduplication to prevent duplicate highlighting tokens from appearing. Done by sorting and a linear search.

Reviewers: hokein, sammccall, ilya-biryukov

Subscribers: MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64634

llvm-svn: 366070
---
 clang-tools-extra/clangd/SemanticHighlighting.cpp         | 8 ++++++++
 .../clangd/unittests/SemanticHighlightingTests.cpp        | 7 +++++++
 2 files changed, 15 insertions(+)

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index 1f8fa9541a6b3..d64472d8fdb1d 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -31,6 +31,14 @@ class HighlightingTokenCollector
   std::vector<HighlightingToken> collectTokens() {
     Tokens.clear();
     TraverseAST(Ctx);
+    // Initializer lists can give duplicates of tokens, therefore all tokens
+    // must be deduplicated.
+    llvm::sort(Tokens,
+               [](const HighlightingToken &L, const HighlightingToken &R) {
+                 return std::tie(L.R, L.Kind) < std::tie(R.R, R.Kind);
+               });
+    auto Last = std::unique(Tokens.begin(), Tokens.end());
+    Tokens.erase(Last, Tokens.end());
     return Tokens;
   }
 
diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index 7ba35fee6d85f..3a1b1c3e7057f 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -166,6 +166,13 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
         $Variable[[AA]].$Field[[E]].$Field[[C]];
         $Class[[A]]::$Variable[[S]] = 90;
       }
+    )cpp",
+    R"cpp(
+      struct $Class[[AA]] {
+        int $Field[[A]];
+      }
+      int $Variable[[B]];
+      $Class[[AA]] $Variable[[A]]{$Variable[[B]]};
     )cpp"};
   for (const auto &TestCase : TestCases) {
     checkHighlightings(TestCase);

From 5153b1723a62e9545a07a6878d99156b8bed3652 Mon Sep 17 00:00:00 2001
From: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Date: Mon, 15 Jul 2019 15:12:16 +0000
Subject: [PATCH 107/451] [AMDGPU][MC][GFX9][GFX10] Added support of
 GET_DOORBELL message

Reviewers: artem.tamazov, arsenm

Differential Revision: https://reviews.llvm.org/D64729

llvm-svn: 366071
---
 llvm/docs/AMDGPU/gfx10_msg.rst                  |  2 ++
 llvm/docs/AMDGPU/gfx9_msg.rst                   |  2 ++
 llvm/lib/Target/AMDGPU/SIDefines.h              |  1 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp |  2 +-
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 11 ++++++++---
 llvm/test/MC/AMDGPU/sopp-gfx10.s                | 17 +++++++++++++++++
 llvm/test/MC/AMDGPU/sopp-gfx9.s                 | 12 +++++++++++-
 7 files changed, 42 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/sopp-gfx10.s

diff --git a/llvm/docs/AMDGPU/gfx10_msg.rst b/llvm/docs/AMDGPU/gfx10_msg.rst
index d56daa5c240dc..ef531a14db19f 100644
--- a/llvm/docs/AMDGPU/gfx10_msg.rst
+++ b/llvm/docs/AMDGPU/gfx10_msg.rst
@@ -53,6 +53,7 @@ Each message type supports specific operations:
     \                            GS_OP_EMIT                     2            Optional
     \                            GS_OP_EMIT_CUT                 3            Optional
     MSG_GS_ALLOC_REQ  9          \-                             \-           \-
+    MSG_GET_DOORBELL  10         \-                             \-           \-
     MSG_SYSMSG        15         SYSMSG_OP_ECC_ERR_INTERRUPT    1            \-
     \                            SYSMSG_OP_REG_RD               2            \-
     \                            SYSMSG_OP_HOST_TRAP_ACK        3            \-
@@ -65,6 +66,7 @@ Examples:
 
     s_sendmsg 0x12
     s_sendmsg sendmsg(MSG_INTERRUPT)
+    s_sendmsg sendmsg(MSG_GET_DOORBELL)
     s_sendmsg sendmsg(2, GS_OP_CUT)
     s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
     s_sendmsg sendmsg(MSG_GS, 2)
diff --git a/llvm/docs/AMDGPU/gfx9_msg.rst b/llvm/docs/AMDGPU/gfx9_msg.rst
index 898848532c179..14dff9050e658 100644
--- a/llvm/docs/AMDGPU/gfx9_msg.rst
+++ b/llvm/docs/AMDGPU/gfx9_msg.rst
@@ -53,6 +53,7 @@ Each message type supports specific operations:
     \                            GS_OP_EMIT                     2            Optional
     \                            GS_OP_EMIT_CUT                 3            Optional
     MSG_GS_ALLOC_REQ  9          \-                             \-           \-
+    MSG_GET_DOORBELL  10         \-                             \-           \-
     MSG_SYSMSG        15         SYSMSG_OP_ECC_ERR_INTERRUPT    1            \-
     \                            SYSMSG_OP_REG_RD               2            \-
     \                            SYSMSG_OP_HOST_TRAP_ACK        3            \-
@@ -65,6 +66,7 @@ Examples:
 
     s_sendmsg 0x12
     s_sendmsg sendmsg(MSG_INTERRUPT)
+    s_sendmsg sendmsg(MSG_GET_DOORBELL)
     s_sendmsg sendmsg(2, GS_OP_CUT)
     s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
     s_sendmsg sendmsg(MSG_GS, 2)
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index a329dd27a6ea9..a0e1ec6ac235b 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -263,6 +263,7 @@ enum Id { // Message ID, width(4) [3:0].
   ID_GS,
   ID_GS_DONE,
   ID_GS_ALLOC_REQ = 9,
+  ID_GET_DOORBELL = 10,
   ID_SYSMSG = 15,
   ID_GAPS_LAST_, // Indicate that sequence has gaps.
   ID_GAPS_FIRST_ = ID_INTERRUPT,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 8b2e91a8ee374..075e08986c0c0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -23,7 +23,7 @@ const char* const IdSymbolic[] = {
   nullptr,
   nullptr,
   "MSG_GS_ALLOC_REQ",
-  nullptr,
+  "MSG_GET_DOORBELL",
   nullptr,
   nullptr,
   nullptr,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 05df975cdd476..e90f40e6abea1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -731,9 +731,14 @@ static bool isValidMsgId(int64_t MsgId) {
 }
 
 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
-  return Strict ?
-         isValidMsgId(MsgId) && (MsgId != ID_GS_ALLOC_REQ || isGFX9(STI) || isGFX10(STI)) :
-         0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
+  if (Strict) {
+    if (MsgId == ID_GS_ALLOC_REQ || MsgId == ID_GET_DOORBELL)
+      return isGFX9(STI) || isGFX10(STI);
+    else
+      return isValidMsgId(MsgId);
+  } else {
+    return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
+  }
 }
 
 StringRef getMsgName(int64_t MsgId) {
diff --git a/llvm/test/MC/AMDGPU/sopp-gfx10.s b/llvm/test/MC/AMDGPU/sopp-gfx10.s
new file mode 100644
index 0000000000000..f597f26e57a8b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/sopp-gfx10.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
+
+//===----------------------------------------------------------------------===//
+// s_sendmsg
+//===----------------------------------------------------------------------===//
+
+s_sendmsg 9
+// GFX10: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; encoding: [0x09,0x00,0x90,0xbf]
+
+s_sendmsg sendmsg(MSG_GS_ALLOC_REQ)
+// GFX10: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; encoding: [0x09,0x00,0x90,0xbf]
+
+s_sendmsg 10
+// GFX10: s_sendmsg sendmsg(MSG_GET_DOORBELL) ; encoding: [0x0a,0x00,0x90,0xbf]
+
+s_sendmsg sendmsg(MSG_GET_DOORBELL)
+// GFX10: s_sendmsg sendmsg(MSG_GET_DOORBELL) ; encoding: [0x0a,0x00,0x90,0xbf]
diff --git a/llvm/test/MC/AMDGPU/sopp-gfx9.s b/llvm/test/MC/AMDGPU/sopp-gfx9.s
index a02f8e1104232..dbf5983cbcd60 100644
--- a/llvm/test/MC/AMDGPU/sopp-gfx9.s
+++ b/llvm/test/MC/AMDGPU/sopp-gfx9.s
@@ -70,8 +70,18 @@ s_waitcnt vmcnt(62) lgkmcnt(14)
 s_waitcnt vmcnt(62) expcnt(6) lgkmcnt(14)
 // GFX9: s_waitcnt vmcnt(62) expcnt(6) lgkmcnt(14) ; encoding: [0x6e,0xce,0x8c,0xbf]
 
+//===----------------------------------------------------------------------===//
+// s_sendmsg
+//===----------------------------------------------------------------------===//
+
 s_sendmsg 9
-// GCN: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; encoding: [0x09,0x00,0x90,0xbf]
+// GFX9: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; encoding: [0x09,0x00,0x90,0xbf]
 
 s_sendmsg sendmsg(MSG_GS_ALLOC_REQ)
 // GFX9: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; encoding: [0x09,0x00,0x90,0xbf]
+
+s_sendmsg 10
+// GFX9: s_sendmsg sendmsg(MSG_GET_DOORBELL) ; encoding: [0x0a,0x00,0x90,0xbf]
+
+s_sendmsg sendmsg(MSG_GET_DOORBELL)
+// GFX9: s_sendmsg sendmsg(MSG_GET_DOORBELL) ; encoding: [0x0a,0x00,0x90,0xbf]

From 62cc16dac2056ba962d7987d10c91d39677f5487 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 15 Jul 2019 15:14:09 +0000
Subject: [PATCH 108/451] gn build: Add a note on how to locally tell git to
 ignore build dir

llvm-svn: 366072
---
 llvm/utils/gn/README.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/gn/README.rst b/llvm/utils/gn/README.rst
index 7ffa144aafb36..bb0e0c7a5d5a1 100644
--- a/llvm/utils/gn/README.rst
+++ b/llvm/utils/gn/README.rst
@@ -51,6 +51,8 @@ GN only works in the monorepo layout.
    you can manually pass these parameters and not use the wrapper if you
    prefer.)
 
+#. ``echo out >> .git/info/exclude`` to tell git to ignore files below ``out``.
+
 #. ``ninja -C out/gn check-lld`` to build all prerequisites for and run the LLD
    tests.
 

From 1d91f94f095b0fb52c66282cb5fde0ada2f190a4 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Mon, 15 Jul 2019 15:16:57 +0000
Subject: [PATCH 109/451] [clangd] Fix doc

llvm-svn: 366073
---
 clang-tools-extra/docs/clangd/Installation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/clangd/Installation.rst b/clang-tools-extra/docs/clangd/Installation.rst
index 1552efce1e627..caf49a75d1105 100644
--- a/clang-tools-extra/docs/clangd/Installation.rst
+++ b/clang-tools-extra/docs/clangd/Installation.rst
@@ -324,7 +324,7 @@ symlink it (or copy it) to the root of your source tree, if they are different.
 
 ::
 
-  $ ln -s ~/myproject/compile_commands.json ~/myproject-build/
+  $ ln -s ~/myproject-build/compile_commands.json ~/myproject/
 
 :raw-html:`</details>`
 

From fd08dcb9db0df6dc1aaf329f790cc4a7af9e0a91 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Mon, 15 Jul 2019 15:34:05 +0000
Subject: [PATCH 110/451] [AMDGPU] fixed scheduler crash in gfx908

For some reason scheduler can send down an SUnit without an
instruction.

Differential Revision: https://reviews.llvm.org/D64709

llvm-svn: 366074
---
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp    |  4 ++--
 .../AMDGPU/power-sched-no-instr-sunit.mir     | 22 +++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a6027e22d527c..1eb9b83456c53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -765,8 +765,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
   FillMFMAShadowMutation(const SIInstrInfo *tii) : TII(tii) {}
 
   bool isSALU(const SUnit *SU) const {
-    const MachineInstr &MI = *SU->getInstr();
-    return TII->isSALU(MI) && !MI.isTerminator();
+    const MachineInstr *MI = SU->getInstr();
+    return MI && TII->isSALU(*MI) && !MI->isTerminator();
   }
 
   bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
diff --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir
new file mode 100644
index 0000000000000..ed648ece0c71a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/power-sched-no-instr-sunit.mir
@@ -0,0 +1,22 @@
+# RUN: llc -march=amdgcn -mcpu=gfx908 %s -run-pass=post-RA-sched -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: test
+# GCN: V_MFMA_F32_32X32X1F32
+# GCN: S_BARRIER
+# GCN: S_NOP 0
+# GCN: V_ACCVGPR_READ_B32
+# GCN: BUFFER_STORE_DWORD_OFFEN
+---
+name:            test
+body:             |
+  bb.0.entry:
+
+    $sgpr6 = S_MOV_B32 $sgpr5
+    $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+    $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
+    S_BARRIER
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32 undef $vgpr0, undef $vgpr0, 0, 0, 0, 2, implicit $exec
+    $vgpr0 = V_ACCVGPR_READ_B32 $agpr31, implicit $exec
+    BUFFER_STORE_DWORD_OFFEN killed $vgpr0, undef $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr6, 0, 0, 0, 0, 0, implicit $exec
+
+...

From a13cca41c5cefd3762f57e1992b238117926c735 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 15 Jul 2019 15:36:37 +0000
Subject: [PATCH 111/451] [ORC] Start adding ORCv1 to ORCv2 transition tips to
 the ORCv2 doc.

llvm-svn: 366075
---
 ...2DesignAndImplementation.rst => ORCv2.rst} | 159 ++++++++++++++++--
 llvm/docs/index.rst                           |   6 +-
 2 files changed, 152 insertions(+), 13 deletions(-)
 rename llvm/docs/{ORCv2DesignAndImplementation.rst => ORCv2.rst} (64%)

diff --git a/llvm/docs/ORCv2DesignAndImplementation.rst b/llvm/docs/ORCv2.rst
similarity index 64%
rename from llvm/docs/ORCv2DesignAndImplementation.rst
rename to llvm/docs/ORCv2.rst
index a99d2aa48bf38..4daa12f560869 100644
--- a/llvm/docs/ORCv2DesignAndImplementation.rst
+++ b/llvm/docs/ORCv2.rst
@@ -16,7 +16,7 @@ Use-cases
 =========
 
 ORC provides a modular API for building JIT compilers. There are a range
-of use cases for such an API:
+of use cases for such an API. For example:
 
 1. The LLVM tutorials use a simple ORC-based JIT class to execute expressions
 compiled from a toy languge: Kaleidoscope.
@@ -56,11 +56,11 @@ ORC provides the following features:
   deferring compilation until first call.
 
 - *Support for custom compilers and program representations*. Clients can supply
-   custom compilers for each symbol that they define in their JIT session. ORC
-   will run the user-supplied compiler when the a definition of a symbol is
-   needed. ORC is actually fully language agnostic: LLVM IR is not treated
-   specially, and is supported via the same wrapper mechanism (the
-   ``MaterializationUnit`` class) that is used for custom compilers.
+  custom compilers for each symbol that they define in their JIT session. ORC
+  will run the user-supplied compiler when the a definition of a symbol is
+  needed. ORC is actually fully language agnostic: LLVM IR is not treated
+  specially, and is supported via the same wrapper mechanism (the
+  ``MaterializationUnit`` class) that is used for custom compilers.
 
 - *Concurrent JIT'd code* and *concurrent compilation*. JIT'd code may spawn
   multiple threads, and may re-enter the JIT (e.g. for lazy compilation)
@@ -311,10 +311,129 @@ Supporting Custom Compilers
 
 TBD.
 
-Low Level (MCJIT style) Use
-===========================
+Transitioning from ORCv1 to ORCv2
+=================================
 
-TBD.
+Since LLVM 7.0 new ORC developement has focused on adding support for concurrent
+compilation. In order to enable concurrency new APIs were introduced
+(ExecutionSession, JITDylib, etc.) and new implementations of existing layers
+were written. In LLVM 8.0 the old layer implementations, which do not support
+concurrency, were renamed (with a "Legacy" prefix), but remained in tree.  In
+LLVM 9.0 we have added a deprecation warning for the old layers and utilities,
+and in LLVM 10.0 the old layers and utilities will be removed.
+
+Clients currently using the legacy (ORCv1) layers and utilities will usually
+find it easy to transition to the newer (ORCv2) variants. Most of the ORCv1
+layers and utilities have ORCv2 counterparts[2]_ that can be
+substituted. However there are some differences between ORCv1 and ORCv2 to be
+aware of:
+
+  1. All JIT stacks now need an ExecutionSession instance which manages the
+     string pool, error reporting, synchronization, and symbol lookup.
+
+  2. ORCv2 uses uniqued strings (``SymbolStringPtr`` instances) to reduce memory
+     overhead and improve lookup performance. To get a uniqued string, call
+     ``intern`` on your ExecutionSession instance:
+
+     .. code-block:: c++
+
+       ExecutionSession ES;
+
+       /// ...
+
+       auto MainSymbolName = ES.intern("main");
+
+  3. Program representations (Modules, Object Files, etc.) are no longer added
+     *to* layers. Instead they are added *to* JITDylibs *by* layers. The layer
+     determines how the program representation will be compiled if it is needed.
+     The JITDylib provides the symbol table, enforces linkage rules (e.g.
+     rejecting duplicate definitions), and synchronizes concurrent compiles.
+
+     Most ORCv1 clients (or MCJIT clients wanting to try out ORCv2) should
+     simply add code to the default *main* JITDylib provided by the
+     ExecutionSession:
+
+     .. code-block:: c++
+
+       ExecutionSession ES;
+       RTDyldObjectLinkingLayer ObjLinkingLayer(
+         ES, []() { return llvm::make_unique<SectionMemoryManager>(); });
+       IRCompileLayer CompileLayer(ES, ObjLinkingLayer, SimpleIRCompiler(TM));
+
+       auto M = loadModule(...);
+
+       if (auto Err = CompileLayer.add(ES.getMainJITDylib(), M))
+         return Err;
+
+  4. IR layers require ThreadSafeModule instances, rather than
+     std::unique_ptr<Module>s. A ThreadSafeModule instance is a pair of a
+     std::unique_ptr<Module> and a ThreadSafeContext, which is in turn a
+     pair of a std::unique_ptr<LLVMContext> and a lock. This allows the JIT
+     to ensure that the LLVMContext for a module is locked before the module
+     is accessed. Multiple ThreadSafeModules may share a ThreadSafeContext
+     value, but in that case the modules will not be able to be compiled
+     concurrently[3]_.
+
+     ThreadSafeContexts may be constructed explicitly:
+
+     .. code-block:: c++
+
+       // ThreadSafeContext shared between two modules.
+       ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
+       ThreadSafeModule TSM1(
+         llvm::make_unique<Module>("M1", *TSCtx.getContext()), TSCtx);
+       ThreadSafeModule TSM2(
+         llvm::make_unique<Module>("M2", *TSCtx.getContext()), TSCtx);
+
+     , or they can be created implicitly by passing a new LLVMContext to the
+     ThreadSafeModuleConstructor:
+
+     .. code-block:: c++
+
+       // Constructing a ThreadSafeModule (and implicitly a ThreadSafeContext)
+       // from a pair of a Module and a Context.
+       auto Ctx = llvm::make_unique<LLVMContext>();
+       auto M = llvm::make_unique<Module>("M", *Ctx);
+       return ThreadSafeModule(std::move(M), std::move(Ctx));
+
+  5. The symbol resolution and lookup scheme have been fundamentally changed.
+     Symbol lookup has been removed from the layer interface. Instead,
+     symbols are looked up via the ``ExecutionSession::lookup`` method by
+     scanning a list of JITDylibs.
+
+     SymbolResolvers have been removed entirely. Resolution rules now follow the
+     linkage relationship between JITDylibs. For example, to resolve a reference
+     to a symbol *F* from a module *M* that has been added to JITDylib *J1* we
+     would first search for a definition of *F* in *J1* then (if no definition
+     was found) search each of the JITDylibs that *J1* links against.
+
+     While the new resolution scheme is, strictly speaking, less flexible than
+     the old scheme of customizable resolvers this has not yet led to problems
+     in practice. Instead, using standard linker rules has removed a lot of
+     boilerplate while providing correct[4]_ behavior for common and weak symbols.
+
+     One notable difference is in exposing in-process symbols to the JIT. To
+     support this (without requiring the set of symbols to be enumerated up
+     front), JITDylibs allow for a *GeneratorFunction* to be attached to
+     generate new definitions upon lookup. Reflecting the processes symbols into
+     the JIT can be done by writing:
+
+     .. code-block:: c++
+
+       ExecutionSession ES;
+       const auto DataLayout &DL = ...;
+
+       {
+         auto ProcessSymbolsGenerator =
+           DynamicLibrarySearchGenerator::GetForCurrentProcess(DL.getGlobalPrefix());
+         if (!ProcessSymbolsGenerator)
+           return ProcessSymbolsGenerator.takeError();
+         ES.getMainJITDylib().setGenerator(std::move(*ProcessSymbolsGenerator));
+       }
+
+  6. Module removal is not yet supported. There is no equivalent of the
+     layer concept removeModule/removeObject methods. Work on resource tracking
+     and removal in ORCv2 is ongoing.
 
 Future Features
 ===============
@@ -322,4 +441,24 @@ Future Features
 TBD: Speculative compilation. Object Caches.
 
 .. [1] Formats/architectures vary in terms of supported features. MachO and
-       ELF tend to have better support than COFF. Patches very welcome!
\ No newline at end of file
+       ELF tend to have better support than COFF. Patches very welcome!
+
+.. [2] The ``LazyEmittingLayer``, ``RemoteObjectClientLayer`` and
+       ``RemoteObjectServerLayer`` do not have counterparts in the new
+       system. In the case of ``LazyEmittingLayer`` it was simply no longer
+       needed: in ORCv2, deferring compilation until symbols are looked up is
+       the default. The removal of ``RemoteObjectClientLayer`` and
+       ``RemoteObjectServerLayer`` means that JIT stacks can no longer be split
+       across processes, however this functionality appears not to have been
+       used.
+
+.. [3] Sharing ThreadSafeModules in a concurrent compilation can be dangerous:
+       if interdependent modules are loaded on the same context, but compiled
+       on different threads a deadlock may occur (with each compile waiting for
+       the other(s) to complete, and the other(s) unable to proceed because the
+       context is locked).
+
+.. [4] Mostly. Weak definitions are handled correctly within dylibs, but if
+       multiple dylibs provide a weak definition of a symbol each will end up
+       with its own definition (similar to how weak symbols in Windows DLLs
+       behave). This will be fixed in the future.
\ No newline at end of file
diff --git a/llvm/docs/index.rst b/llvm/docs/index.rst
index e63bfc6ee179c..cf1476789047a 100644
--- a/llvm/docs/index.rst
+++ b/llvm/docs/index.rst
@@ -89,7 +89,7 @@ intermediate LLVM representation.
    GetElementPtr
    Frontend/PerformanceTips
    MCJITDesignAndImplementation
-   ORCv2DesignAndImplementation
+   ORCv2
    CodeOfConduct
    CompileCudaWithLLVM
    ReportingGuide
@@ -383,9 +383,9 @@ For API clients and LLVM developers.
 :doc:`MCJITDesignAndImplementation`
    Describes the inner workings of MCJIT execution engine.
 
-:doc:`ORCv2DesignAndImplementation`
+:doc:`ORCv2`
    Describes the design and implementation of the ORC APIs, including some
-   usage examples.
+   usage examples, and a guide for users transitioning from ORCv1 to ORCv2.
 
 :doc:`BranchWeightMetadata`
    Provides information about Branch Prediction Information.

From e5086481b65a126464e6579fd2240ba691758414 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 15 Jul 2019 15:42:41 +0000
Subject: [PATCH 112/451] fix unnamed fiefield issue and add tests for
 __builtin_preserve_access_index intrinsic

This is a followup patch for https://reviews.llvm.org/D61809.
Handle unnamed bitfield properly and add more test cases.

Fixed the unnamed bitfield issue. The unnamed bitfield is ignored
by debug info, so we need to ignore such a struct/union member
when we try to get the member index in the debug info.

D61809 contains two test cases but not enough as it does
not checking generated IRs in the fine grain level, and also
it does not have semantics checking tests.
This patch added unit tests for both code gen and semantics checking for
the new intrinsic.

Signed-off-by: Yonghong Song <yhs@fb.com>
llvm-svn: 366076
---
 clang/lib/CodeGen/CGExpr.cpp                  |  21 ++-
 clang/lib/CodeGen/CodeGenFunction.h           |   3 +
 .../CodeGen/builtin-preserve-access-index.c   | 177 ++++++++++++++++++
 .../test/Sema/builtin-preserve-access-index.c |  13 ++
 4 files changed, 212 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/builtin-preserve-access-index.c
 create mode 100644 clang/test/Sema/builtin-preserve-access-index.c

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 62d930ca8c455..25d2424eb85ad 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3892,6 +3892,23 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) {
   return EmitLValueForField(LambdaLV, Field);
 }
 
+/// Get the field index in the debug info. The debug info structure/union
+/// will ignore the unnamed bitfields.
+unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
+                                             unsigned FieldIndex) {
+  unsigned I = 0, Skipped = 0;
+
+  for (auto F : Rec->getDefinition()->fields()) {
+    if (I == FieldIndex)
+      break;
+    if (F->isUnnamedBitfield())
+      Skipped++;
+    I++;
+  }
+
+  return FieldIndex - Skipped;
+}
+
 /// Get the address of a zero-sized field within a record. The resulting
 /// address doesn't necessarily have the right type.
 static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
@@ -3931,7 +3948,7 @@ static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base,
       CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
   return CGF.Builder.CreatePreserveStructAccessIndex(
-      base, idx, field->getFieldIndex(), DbgInfo);
+      base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo);
 }
 
 static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
@@ -4048,7 +4065,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
           getContext().getRecordType(rec), rec->getLocation());
       addr = Address(
           Builder.CreatePreserveUnionAccessIndex(
-              addr.getPointer(), field->getFieldIndex(), DbgInfo),
+              addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo),
           addr.getAlignment());
     }
   } else {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a51a9711ff170..0d534af42cddb 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2652,6 +2652,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Converts Location to a DebugLoc, if debug information is enabled.
   llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location);
 
+  /// Get the record field index as represented in debug info.
+  unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
+
 
   //===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/clang/test/CodeGen/builtin-preserve-access-index.c b/clang/test/CodeGen/builtin-preserve-access-index.c
new file mode 100644
index 0000000000000..c56c6ecc0e566
--- /dev/null
+++ b/clang/test/CodeGen/builtin-preserve-access-index.c
@@ -0,0 +1,177 @@
+// RUN: %clang -target x86_64 -emit-llvm -S -g %s -o - | FileCheck %s
+
+#define _(x) (__builtin_preserve_access_index(x))
+
+const void *unit1(const void *arg) {
+  return _(arg);
+}
+// CHECK: define dso_local i8* @unit1(i8* %arg)
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+
+const void *unit2(void) {
+  return _((const void *)0xffffffffFFFF0000ULL);
+}
+// CHECK: define dso_local i8* @unit2()
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+
+const void *unit3(const int *arg) {
+  return _(arg + 1);
+}
+// CHECK: define dso_local i8* @unit3(i32* %arg)
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+
+const void *unit4(const int *arg) {
+  return _(&arg[1]);
+}
+// CHECK: define dso_local i8* @unit4(i32* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %0, i32 0, i32 1)
+
+const void *unit5(const int *arg[5]) {
+  return _(&arg[1][2]);
+}
+// CHECK: define dso_local i8* @unit5(i32** %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call i32** @llvm.preserve.array.access.index.p0p0i32.p0p0i32(i32** %0, i32 0, i32 1)
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %2, i32 0, i32 2)
+
+struct s1 {
+  char a;
+  int b;
+};
+
+struct s2 {
+  char a1:1;
+  char a2:1;
+  int b;
+};
+
+struct s3 {
+  char a1:1;
+  char a2:1;
+  char :6;
+  int b;
+};
+
+const void *unit6(struct s1 *arg) {
+  return _(&arg->a);
+}
+// CHECK: define dso_local i8* @unit6(%struct.s1* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.s1s(%struct.s1* %0, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1:[0-9]+]]
+
+const void *unit7(struct s1 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit7(%struct.s1* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %0, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1]]
+
+const void *unit8(struct s2 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit8(%struct.s2* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s2s(%struct.s2* %0, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S2:[0-9]+]]
+
+const void *unit9(struct s3 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit9(%struct.s3* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s3s(%struct.s3* %0, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S3:[0-9]+]]
+
+union u1 {
+  char a;
+  int b;
+};
+
+union u2 {
+  char a;
+  int :32;
+  int b;
+};
+
+const void *unit10(union u1 *arg) {
+  return _(&arg->a);
+}
+// CHECK: define dso_local i8* @unit10(%union.u1* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1:[0-9]+]]
+
+const void *unit11(union u1 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit11(%union.u1* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %0, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1]]
+
+const void *unit12(union u2 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit12(%union.u2* %arg)
+// CHECK-NOT: getelementptr
+// CHECK: call %union.u2* @llvm.preserve.union.access.index.p0s_union.u2s.p0s_union.u2s(%union.u2* %0, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U2:[0-9]+]]
+
+struct s4 {
+  char d;
+  union u {
+    int b[4];
+    char a;
+  } c;
+};
+
+union u3 {
+  struct s {
+    int b[4];
+  } c;
+  char a;
+};
+
+const void *unit13(struct s4 *arg) {
+  return _(&arg->c.b[2]);
+}
+// CHECK: define dso_local i8* @unit13(%struct.s4* %arg)
+// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %0, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4:[0-9]+]]
+// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %1, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U:[0-9]+]]
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %b, i32 1, i32 2)
+
+const void *unit14(union u3 *arg) {
+  return _(&arg->c.b[2]);
+}
+// CHECK: define dso_local i8* @unit14(%union.u3* %arg)
+// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3:[0-9]+]]
+// CHECK: call [4 x i32]* @llvm.preserve.struct.access.index.p0a4i32.p0s_struct.ss(%struct.s* %c, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_I_S:[0-9]+]]
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %2, i32 1, i32 2)
+
+const void *unit15(struct s4 *arg) {
+  return _(&arg[2].c.a);
+}
+// CHECK: define dso_local i8* @unit15(%struct.s4* %arg)
+// CHECK: call %struct.s4* @llvm.preserve.array.access.index.p0s_struct.s4s.p0s_struct.s4s(%struct.s4* %0, i32 0, i32 2)
+// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %1, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4]]
+// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %2, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U]]
+
+const void *unit16(union u3 *arg) {
+  return _(&arg[2].a);
+}
+// CHECK: define dso_local i8* @unit16(%union.u3* %arg)
+// CHECK: call %union.u3* @llvm.preserve.array.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %0, i32 0, i32 2)
+// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3]]
+
+// CHECK: ![[STRUCT_S1]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1",
+// CHECK: ![[STRUCT_S2]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2",
+// CHECK: ![[STRUCT_S3]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s3",
+// CHECK: ![[UNION_U1]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u1",
+// CHECK: ![[UNION_U2]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u2",
+// CHECK: ![[STRUCT_S4]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s4",
+// CHECK: ![[UNION_I_U]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u",
+// CHECK: ![[UNION_U3]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u3",
+// CHECK: ![[STRUCT_I_S]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s",
diff --git a/clang/test/Sema/builtin-preserve-access-index.c b/clang/test/Sema/builtin-preserve-access-index.c
new file mode 100644
index 0000000000000..c10ceb5145b8c
--- /dev/null
+++ b/clang/test/Sema/builtin-preserve-access-index.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s
+
+const void *invalid1(const int *arg) {
+  return __builtin_preserve_access_index(&arg[1], 1); // expected-error {{too many arguments to function call, expected 1, have 2}}
+}
+
+void *invalid2(const int *arg) {
+  return __builtin_preserve_access_index(&arg[1]); // expected-warning {{returning 'const void *' from a function with result type 'void *' discards qualifiers}}
+}
+
+const void *invalid3(const int *arg) {
+  return __builtin_preserve_access_index(1); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const void *'}}
+}

From 838c8e30c2f64e5398568b6369baa17aa38942fa Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 15 Jul 2019 15:43:04 +0000
Subject: [PATCH 113/451] [X86][SSE] Add PACKSS with zero shuffle masks.

This is an example of expansion due to D61129 - it should combine back to a PACKSS with a zero operand.

llvm-svn: 366077
---
 llvm/test/CodeGen/X86/packss.ll | 61 +++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll
index 2e22501a98672..5ac486281e760 100644
--- a/llvm/test/CodeGen/X86/packss.ll
+++ b/llvm/test/CodeGen/X86/packss.ll
@@ -264,3 +264,64 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
   %5 = trunc <8 x i32> %4 to <8 x i16>
   ret <8 x i16> %5
 }
+
+define <16 x i8> @packsswb_icmp_128_zero(<8 x i16> %a0) {
+; SSE-LABEL: packsswb_128_zero:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
+; SSE-NEXT:    packsswb %xmm0, %xmm1
+; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
+; SSE-NEXT:    ret{{[l|q]}}
+;
+; AVX-LABEL: packsswb_128_zero:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    ret{{[l|q]}}
+  %1 = icmp eq <8 x i16> %a0, zeroinitializer
+  %2 = sext <8 x i1> %1 to <8 x i8>
+  %3 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %3
+}
+
+define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) {
+; SSE-LABEL: packsswb_zero_256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pxor %xmm2, %xmm2
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3]
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    movaps %xmm2, %xmm1
+; SSE-NEXT:    ret{{[l|q]}}
+;
+; AVX1-LABEL: packsswb_zero_256:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX1-NEXT:    ret{{[l|q]}}
+;
+; AVX2-LABEL: packsswb_zero_256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    ret{{[l|q]}}
+  %1 = icmp eq <16 x i16> %a0, zeroinitializer
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  %3 = shufflevector <16 x i8> zeroinitializer, <16 x i8> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %3
+}

From 5a07a614c0f13027ecf4a963f9ffecc47c0bf3c1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 15 Jul 2019 16:23:42 +0000
Subject: [PATCH 114/451] [X86][SSE] Regenerated packss.ll test file.

Not sure what went wrong in rL366077....

llvm-svn: 366079
---
 llvm/test/CodeGen/X86/packss.ll | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/X86/packss.ll b/llvm/test/CodeGen/X86/packss.ll
index 5ac486281e760..7c4e1d7ec1621 100644
--- a/llvm/test/CodeGen/X86/packss.ll
+++ b/llvm/test/CodeGen/X86/packss.ll
@@ -266,7 +266,7 @@ define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
 }
 
 define <16 x i8> @packsswb_icmp_128_zero(<8 x i16> %a0) {
-; SSE-LABEL: packsswb_128_zero:
+; SSE-LABEL: packsswb_icmp_128_zero:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -274,7 +274,7 @@ define <16 x i8> @packsswb_icmp_128_zero(<8 x i16> %a0) {
 ; SSE-NEXT:    movq {{.*#+}} xmm0 = xmm1[0],zero
 ; SSE-NEXT:    ret{{[l|q]}}
 ;
-; AVX-LABEL: packsswb_128_zero:
+; AVX-LABEL: packsswb_icmp_128_zero:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -287,7 +287,7 @@ define <16 x i8> @packsswb_icmp_128_zero(<8 x i16> %a0) {
 }
 
 define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) {
-; SSE-LABEL: packsswb_zero_256:
+; SSE-LABEL: packsswb_icmp_zero_256:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -298,7 +298,7 @@ define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) {
 ; SSE-NEXT:    movaps %xmm2, %xmm1
 ; SSE-NEXT:    ret{{[l|q]}}
 ;
-; AVX1-LABEL: packsswb_zero_256:
+; AVX1-LABEL: packsswb_icmp_zero_256:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -311,7 +311,7 @@ define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) {
 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
 ; AVX1-NEXT:    ret{{[l|q]}}
 ;
-; AVX2-LABEL: packsswb_zero_256:
+; AVX2-LABEL: packsswb_icmp_zero_256:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0

From 73e33368090228c4fc404f9d8db1c89cafe45f98 Mon Sep 17 00:00:00 2001
From: James Henderson <jh7370@my.bristol.ac.uk>
Date: Mon, 15 Jul 2019 16:40:34 +0000
Subject: [PATCH 115/451] [docs][llvm-nm] Fix inconsistent grammar

llvm-svn: 366080
---
 llvm/docs/CommandGuide/llvm-nm.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst
index 3b7ea93a0d267..aa58a3f3884ca 100644
--- a/llvm/docs/CommandGuide/llvm-nm.rst
+++ b/llvm/docs/CommandGuide/llvm-nm.rst
@@ -177,7 +177,7 @@ OPTIONS
 
 .. option:: --no-sort, -p
 
- Shows symbols in the order encountered.
+ Show symbols in the order encountered.
 
 .. option:: --no-weak, -W
 

From 7284d443c3ba20f3d2f58c9034a123acd4c89d96 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Mon, 15 Jul 2019 16:43:36 +0000
Subject: [PATCH 116/451] Revert "r366069: [PatternMatch] Implement matching
 code for LibFunc"

Reason: the change introduced a layering violation by adding a
dependency on IR to Analysis.

llvm-svn: 366081
---
 llvm/include/llvm/IR/PatternMatch.h | 52 -----------------------------
 llvm/unittests/IR/PatternMatch.cpp  | 38 +--------------------
 2 files changed, 1 insertion(+), 89 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index b45a6b0bc8b8f..0f03d7cc56b88 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -30,7 +30,6 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstrTypes.h"
@@ -1813,57 +1812,6 @@ template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
   return Signum_match<Val_t>(V);
 }
 
-/// \brief LibFunc matchers.
-struct LibFunc_match {
-  LibFunc F;
-  TargetLibraryInfo TLI;
-  
-  LibFunc_match(LibFunc Func, TargetLibraryInfo TargetLI)
-  : F(Func), TLI(TargetLI) {}
-  
-  template <typename OpTy> bool match(OpTy *V) {
-    LibFunc LF;
-    if (const auto *CI = dyn_cast<CallInst>(V))
-      if (!CI->isNoBuiltin() && CI->getCalledFunction() &&
-          TLI.getLibFunc(*CI->getCalledFunction(), LF) &&
-          LF == F && TLI.has(LF))
-        return true;
-    return false;
-  }
-};
-
-/// LibFunc matches are combinations of Name matchers, and argument
-/// matchers.
-template <typename T0 = void, typename T1 = void, typename T2 = void>
-struct m_LibFunc_Ty;
-template <typename T0> struct m_LibFunc_Ty<T0> {
-  using Ty = match_combine_and<LibFunc_match, Argument_match<T0>>;
-};
-template <typename T0, typename T1> struct m_LibFunc_Ty<T0, T1> {
-  using Ty =
-  match_combine_and<typename m_LibFunc_Ty<T0>::Ty,
-  Argument_match<T1>>;
-};
-
-/// \brief Match LibFunc calls like this:
-/// m_LibFunc<LibFunc_tan>(m_Value(X))
-template <LibFunc F>
-inline LibFunc_match m_LibFunc(TargetLibraryInfo TLI) {
-  return LibFunc_match(F, TLI);
-}
-
-template <LibFunc F, typename T0>
-inline typename m_LibFunc_Ty<T0>::Ty
-m_LibFunc(const TargetLibraryInfo TLI, const T0 &Op0) {
-  return m_CombineAnd(m_LibFunc<F>(TLI), m_Argument<0>(Op0));
-}
-
-template <LibFunc F, typename T0, typename T1>
-inline typename m_LibFunc_Ty<T0, T1>::Ty
-m_LibFunc(const TargetLibraryInfo TLI, const T0 &Op0, const T1 &Op1) {
-  return m_CombineAnd(m_LibFunc<F>(TLI, Op0), m_Argument<1>(Op1));
-}
-
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 3f8e691801951..600494fba2671 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -35,15 +35,13 @@ struct PatternMatchTest : ::testing::Test {
   Function *F;
   BasicBlock *BB;
   IRBuilder<NoFolder> IRB;
-  TargetLibraryInfoImpl TLII;
-  TargetLibraryInfo TLI;
 
   PatternMatchTest()
       : M(new Module("PatternMatchTestModule", Ctx)),
         F(Function::Create(
             FunctionType::get(Type::getVoidTy(Ctx), /* IsVarArg */ false),
             Function::ExternalLinkage, "f", M.get())),
-        BB(BasicBlock::Create(Ctx, "entry", F)), IRB(BB), TLI(TLII) {}
+        BB(BasicBlock::Create(Ctx, "entry", F)), IRB(BB) {}
 };
 
 TEST_F(PatternMatchTest, OneUse) {
@@ -1010,40 +1008,6 @@ TEST_F(PatternMatchTest, FloatingPointFNeg) {
   EXPECT_FALSE(match(V3, m_FNeg(m_Value(Match))));
 }
 
-TEST_F(PatternMatchTest, LibFunc) {
-  Type *FltTy = IRB.getFloatTy();
-  Value *One = ConstantFP::get(FltTy, 1.0);
-  Value *Two = ConstantFP::get(FltTy, 2.0);
-  Value *MatchOne, *MatchTwo;
-
-  StringRef TanName = TLI.getName(LibFunc_tan);
-  FunctionCallee TanCallee = M->getOrInsertFunction(TanName, FltTy, FltTy);
-  CallInst *Tan = IRB.CreateCall(TanCallee, One, TanName);
-
-  StringRef PowName = TLI.getName(LibFunc_pow);
-  FunctionCallee PowCallee = M->getOrInsertFunction(PowName, FltTy, FltTy, FltTy);
-  CallInst *Pow = IRB.CreateCall(PowCallee, {One, Two}, PowName);
-
-  EXPECT_TRUE(match(Tan, m_LibFunc<LibFunc_tan>(TLI)));
-  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_pow>(TLI)));
-  EXPECT_FALSE(match(Pow, m_LibFunc<LibFunc_tan>(TLI)));
-
-  EXPECT_TRUE(match(Tan, m_LibFunc<LibFunc_tan>(TLI, m_Value(MatchOne))));
-  EXPECT_EQ(One, MatchOne);
-  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_sin>(TLI, m_Value())));
-
-  EXPECT_TRUE(match(Pow, m_LibFunc<LibFunc_pow>(TLI, m_Value(MatchOne),
-                                                m_Value(MatchTwo))));
-  EXPECT_EQ(One, MatchOne);
-  EXPECT_EQ(Two, MatchTwo);
-  EXPECT_FALSE(match(Pow, m_LibFunc<LibFunc_fminf>(TLI, m_Value(), m_Value())));
-  
-  TLII.disableAllFunctions();
-  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_tan>(TLI)));
-  EXPECT_FALSE(match(Tan, m_LibFunc<LibFunc_tan>(TLI, m_Value())));
-  EXPECT_FALSE(match(Pow, m_LibFunc<LibFunc_pow>(TLI, m_Value(), m_Value())));
-}
-
 template <typename T> struct MutableConstTest : PatternMatchTest { };
 
 typedef ::testing::Types<std::tuple<Value*, Instruction*>,

From a53e779edc859f6b9d56e58af29a975ecbbfbf3a Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 15 Jul 2019 16:59:38 +0000
Subject: [PATCH 117/451] [x86] add tests for reductions that might be better
 with more horizontal ops; NFC

llvm-svn: 366082
---
 llvm/test/CodeGen/X86/haddsub.ll          | 204 ++++++++++++
 llvm/test/CodeGen/X86/phaddsub-extract.ll | 363 ++++++++++++++++++++++
 2 files changed, 567 insertions(+)

diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll
index 902fddaad305b..a3490658d4095 100644
--- a/llvm/test/CodeGen/X86/haddsub.ll
+++ b/llvm/test/CodeGen/X86/haddsub.ll
@@ -1781,3 +1781,207 @@ define float @PR39936_v8f32(<8 x float>) {
   %10 = extractelement <8 x float> %9, i32 0
   ret float %10
 }
+
+define float @hadd32_4(<4 x float> %x225) {
+; SSE3-SLOW-LABEL: hadd32_4:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    movaps %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-SLOW-NEXT:    addps %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE3-SLOW-NEXT:    addss %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movaps %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd32_4:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    movaps %xmm0, %xmm1
+; SSE3-FAST-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-FAST-NEXT:    addps %xmm0, %xmm1
+; SSE3-FAST-NEXT:    haddps %xmm1, %xmm1
+; SSE3-FAST-NEXT:    movaps %xmm1, %xmm0
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd32_4:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-SLOW-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-SLOW-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: hadd32_4:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-FAST-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    retq
+  %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %x227 = fadd <4 x float> %x225, %x226
+  %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %x229 = fadd <4 x float> %x227, %x228
+  %x230 = extractelement <4 x float> %x229, i32 0
+  ret float %x230
+}
+
+define float @hadd32_8(<8 x float> %x225) {
+; SSE3-SLOW-LABEL: hadd32_8:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    movaps %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-SLOW-NEXT:    addps %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE3-SLOW-NEXT:    addss %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movaps %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd32_8:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    movaps %xmm0, %xmm1
+; SSE3-FAST-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-FAST-NEXT:    addps %xmm0, %xmm1
+; SSE3-FAST-NEXT:    haddps %xmm1, %xmm1
+; SSE3-FAST-NEXT:    movaps %xmm1, %xmm0
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd32_8:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-SLOW-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-SLOW-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vzeroupper
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: hadd32_8:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-FAST-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vzeroupper
+; AVX-FAST-NEXT:    retq
+  %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = fadd <8 x float> %x225, %x226
+  %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = fadd <8 x float> %x227, %x228
+  %x230 = extractelement <8 x float> %x229, i32 0
+  ret float %x230
+}
+
+define float @hadd32_16(<16 x float> %x225) {
+; SSE3-SLOW-LABEL: hadd32_16:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    movaps %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-SLOW-NEXT:    addps %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE3-SLOW-NEXT:    addss %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movaps %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd32_16:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    movaps %xmm0, %xmm1
+; SSE3-FAST-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-FAST-NEXT:    addps %xmm0, %xmm1
+; SSE3-FAST-NEXT:    haddps %xmm1, %xmm1
+; SSE3-FAST-NEXT:    movaps %xmm1, %xmm0
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd32_16:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-SLOW-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-SLOW-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vzeroupper
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: hadd32_16:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-FAST-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vzeroupper
+; AVX-FAST-NEXT:    retq
+  %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = fadd <16 x float> %x225, %x226
+  %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = fadd <16 x float> %x227, %x228
+  %x230 = extractelement <16 x float> %x229, i32 0
+  ret float %x230
+}
+
+define float @hadd32_4_optsize(<4 x float> %x225) optsize {
+; SSE3-LABEL: hadd32_4_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    movaps %xmm0, %xmm1
+; SSE3-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-NEXT:    addps %xmm0, %xmm1
+; SSE3-NEXT:    haddps %xmm1, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; AVX-LABEL: hadd32_4_optsize:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %x226 = shufflevector <4 x float> %x225, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %x227 = fadd <4 x float> %x225, %x226
+  %x228 = shufflevector <4 x float> %x227, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %x229 = fadd <4 x float> %x227, %x228
+  %x230 = extractelement <4 x float> %x229, i32 0
+  ret float %x230
+}
+
+define float @hadd32_8_optsize(<8 x float> %x225) optsize {
+; SSE3-LABEL: hadd32_8_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    movaps %xmm0, %xmm1
+; SSE3-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-NEXT:    addps %xmm0, %xmm1
+; SSE3-NEXT:    haddps %xmm1, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; AVX-LABEL: hadd32_8_optsize:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %x226 = shufflevector <8 x float> %x225, <8 x float> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = fadd <8 x float> %x225, %x226
+  %x228 = shufflevector <8 x float> %x227, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = fadd <8 x float> %x227, %x228
+  %x230 = extractelement <8 x float> %x229, i32 0
+  ret float %x230
+}
+
+define float @hadd32_16_optsize(<16 x float> %x225) optsize {
+; SSE3-LABEL: hadd32_16_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    movaps %xmm0, %xmm1
+; SSE3-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE3-NEXT:    addps %xmm0, %xmm1
+; SSE3-NEXT:    haddps %xmm1, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; AVX-LABEL: hadd32_16_optsize:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = fadd <16 x float> %x225, %x226
+  %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = fadd <16 x float> %x227, %x228
+  %x230 = extractelement <16 x float> %x229, i32 0
+  ret float %x230
+}
diff --git a/llvm/test/CodeGen/X86/phaddsub-extract.ll b/llvm/test/CodeGen/X86/phaddsub-extract.ll
index 7851bcd812204..e81952d331c25 100644
--- a/llvm/test/CodeGen/X86/phaddsub-extract.ll
+++ b/llvm/test/CodeGen/X86/phaddsub-extract.ll
@@ -1885,3 +1885,366 @@ define i32 @partial_reduction_sub_v16i32(<16 x i32> %x) {
   ret i32 %r
 }
 
+; PR42023 - https://bugs.llvm.org/show_bug.cgi?id=42023
+
+define i16 @hadd16_8(<8 x i16> %x223) {
+; SSE3-SLOW-LABEL: hadd16_8:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-SLOW-NEXT:    paddw %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE3-SLOW-NEXT:    paddw %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    movdqa %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    psrld $16, %xmm1
+; SSE3-SLOW-NEXT:    paddw %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    movd %xmm1, %eax
+; SSE3-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd16_8:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-FAST-NEXT:    paddw %xmm0, %xmm1
+; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE3-FAST-NEXT:    paddw %xmm1, %xmm0
+; SSE3-FAST-NEXT:    phaddw %xmm0, %xmm0
+; SSE3-FAST-NEXT:    movd %xmm0, %eax
+; SSE3-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd16_8:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-SLOW-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-SLOW-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vpsrld $16, %xmm0, %xmm1
+; AVX-SLOW-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX-SLOW-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: hadd16_8:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX-FAST-NEXT:    retq
+  %x224 = shufflevector <8 x i16> %x223, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x225 = add <8 x i16> %x223, %x224
+  %x226 = shufflevector <8 x i16> %x225, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = add <8 x i16> %x225, %x226
+  %x228 = shufflevector <8 x i16> %x227, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = add <8 x i16> %x227, %x228
+  %x230 = extractelement <8 x i16> %x229, i32 0
+  ret i16 %x230
+}
+
+define i32 @hadd32_4(<4 x i32> %x225) {
+; SSE3-SLOW-LABEL: hadd32_4:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-SLOW-NEXT:    paddd %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE3-SLOW-NEXT:    paddd %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    movd %xmm0, %eax
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd32_4:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-FAST-NEXT:    paddd %xmm0, %xmm1
+; SSE3-FAST-NEXT:    phaddd %xmm1, %xmm1
+; SSE3-FAST-NEXT:    movd %xmm1, %eax
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd32_4:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: hadd32_4:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX-FAST-NEXT:    retq
+  %x226 = shufflevector <4 x i32> %x225, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %x227 = add <4 x i32> %x225, %x226
+  %x228 = shufflevector <4 x i32> %x227, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %x229 = add <4 x i32> %x227, %x228
+  %x230 = extractelement <4 x i32> %x229, i32 0
+  ret i32 %x230
+}
+
+define i32 @hadd32_8(<8 x i32> %x225) {
+; SSE3-SLOW-LABEL: hadd32_8:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-SLOW-NEXT:    paddd %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE3-SLOW-NEXT:    paddd %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    movd %xmm0, %eax
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd32_8:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-FAST-NEXT:    paddd %xmm0, %xmm1
+; SSE3-FAST-NEXT:    phaddd %xmm1, %xmm1
+; SSE3-FAST-NEXT:    movd %xmm1, %eax
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd32_8:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX-SLOW-NEXT:    vzeroupper
+; AVX-SLOW-NEXT:    retq
+;
+; AVX-FAST-LABEL: hadd32_8:
+; AVX-FAST:       # %bb.0:
+; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX-FAST-NEXT:    vzeroupper
+; AVX-FAST-NEXT:    retq
+  %x226 = shufflevector <8 x i32> %x225, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = add <8 x i32> %x225, %x226
+  %x228 = shufflevector <8 x i32> %x227, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = add <8 x i32> %x227, %x228
+  %x230 = extractelement <8 x i32> %x229, i32 0
+  ret i32 %x230
+}
+
+define i32 @hadd32_16(<16 x i32> %x225) {
+; SSE3-SLOW-LABEL: hadd32_16:
+; SSE3-SLOW:       # %bb.0:
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-SLOW-NEXT:    paddd %xmm0, %xmm1
+; SSE3-SLOW-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE3-SLOW-NEXT:    paddd %xmm1, %xmm0
+; SSE3-SLOW-NEXT:    movd %xmm0, %eax
+; SSE3-SLOW-NEXT:    retq
+;
+; SSE3-FAST-LABEL: hadd32_16:
+; SSE3-FAST:       # %bb.0:
+; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-FAST-NEXT:    paddd %xmm0, %xmm1
+; SSE3-FAST-NEXT:    phaddd %xmm1, %xmm1
+; SSE3-FAST-NEXT:    movd %xmm1, %eax
+; SSE3-FAST-NEXT:    retq
+;
+; AVX-SLOW-LABEL: hadd32_16:
+; AVX-SLOW:       # %bb.0:
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX-SLOW-NEXT:    vzeroupper
+; AVX-SLOW-NEXT:    retq
+;
+; AVX1-FAST-LABEL: hadd32_16:
+; AVX1-FAST:       # %bb.0:
+; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX1-FAST-NEXT:    vzeroupper
+; AVX1-FAST-NEXT:    retq
+;
+; AVX2-FAST-LABEL: hadd32_16:
+; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX2-FAST-NEXT:    vzeroupper
+; AVX2-FAST-NEXT:    retq
+;
+; AVX512-FAST-LABEL: hadd32_16:
+; AVX512-FAST:       # %bb.0:
+; AVX512-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX512-FAST-NEXT:    vzeroupper
+; AVX512-FAST-NEXT:    retq
+  %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = add <16 x i32> %x225, %x226
+  %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = add <16 x i32> %x227, %x228
+  %x230 = extractelement <16 x i32> %x229, i32 0
+  ret i32 %x230
+}
+
+define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize {
+; SSE3-LABEL: hadd16_8_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-NEXT:    paddw %xmm0, %xmm1
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE3-NEXT:    paddw %xmm1, %xmm0
+; SSE3-NEXT:    phaddw %xmm0, %xmm0
+; SSE3-NEXT:    movd %xmm0, %eax
+; SSE3-NEXT:    # kill: def $ax killed $ax killed $eax
+; SSE3-NEXT:    retq
+;
+; AVX-LABEL: hadd16_8_optsize:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
+; AVX-NEXT:    retq
+  %x224 = shufflevector <8 x i16> %x223, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x225 = add <8 x i16> %x223, %x224
+  %x226 = shufflevector <8 x i16> %x225, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = add <8 x i16> %x225, %x226
+  %x228 = shufflevector <8 x i16> %x227, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = add <8 x i16> %x227, %x228
+  %x230 = extractelement <8 x i16> %x229, i32 0
+  ret i16 %x230
+}
+
+define i32 @hadd32_4_optsize(<4 x i32> %x225) optsize {
+; SSE3-LABEL: hadd32_4_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-NEXT:    paddd %xmm0, %xmm1
+; SSE3-NEXT:    phaddd %xmm1, %xmm1
+; SSE3-NEXT:    movd %xmm1, %eax
+; SSE3-NEXT:    retq
+;
+; AVX-LABEL: hadd32_4_optsize:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    retq
+  %x226 = shufflevector <4 x i32> %x225, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %x227 = add <4 x i32> %x225, %x226
+  %x228 = shufflevector <4 x i32> %x227, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %x229 = add <4 x i32> %x227, %x228
+  %x230 = extractelement <4 x i32> %x229, i32 0
+  ret i32 %x230
+}
+
+define i32 @hadd32_8_optsize(<8 x i32> %x225) optsize {
+; SSE3-LABEL: hadd32_8_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-NEXT:    paddd %xmm0, %xmm1
+; SSE3-NEXT:    phaddd %xmm1, %xmm1
+; SSE3-NEXT:    movd %xmm1, %eax
+; SSE3-NEXT:    retq
+;
+; AVX-LABEL: hadd32_8_optsize:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %x226 = shufflevector <8 x i32> %x225, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = add <8 x i32> %x225, %x226
+  %x228 = shufflevector <8 x i32> %x227, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = add <8 x i32> %x227, %x228
+  %x230 = extractelement <8 x i32> %x229, i32 0
+  ret i32 %x230
+}
+
+define i32 @hadd32_16_optsize(<16 x i32> %x225) optsize {
+; SSE3-LABEL: hadd32_16_optsize:
+; SSE3:       # %bb.0:
+; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE3-NEXT:    paddd %xmm0, %xmm1
+; SSE3-NEXT:    phaddd %xmm1, %xmm1
+; SSE3-NEXT:    movd %xmm1, %eax
+; SSE3-NEXT:    retq
+;
+; AVX1-SLOW-LABEL: hadd32_16_optsize:
+; AVX1-SLOW:       # %bb.0:
+; AVX1-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-SLOW-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX1-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX1-SLOW-NEXT:    vzeroupper
+; AVX1-SLOW-NEXT:    retq
+;
+; AVX1-FAST-LABEL: hadd32_16_optsize:
+; AVX1-FAST:       # %bb.0:
+; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX1-FAST-NEXT:    vzeroupper
+; AVX1-FAST-NEXT:    retq
+;
+; AVX2-SLOW-LABEL: hadd32_16_optsize:
+; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX2-SLOW-NEXT:    vzeroupper
+; AVX2-SLOW-NEXT:    retq
+;
+; AVX2-FAST-LABEL: hadd32_16_optsize:
+; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX2-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX2-FAST-NEXT:    vzeroupper
+; AVX2-FAST-NEXT:    retq
+;
+; AVX512-SLOW-LABEL: hadd32_16_optsize:
+; AVX512-SLOW:       # %bb.0:
+; AVX512-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-SLOW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-SLOW-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX512-SLOW-NEXT:    vzeroupper
+; AVX512-SLOW-NEXT:    retq
+;
+; AVX512-FAST-LABEL: hadd32_16_optsize:
+; AVX512-FAST:       # %bb.0:
+; AVX512-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX512-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX512-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX512-FAST-NEXT:    vzeroupper
+; AVX512-FAST-NEXT:    retq
+  %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x227 = add <16 x i32> %x225, %x226
+  %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %x229 = add <16 x i32> %x227, %x228
+  %x230 = extractelement <16 x i32> %x229, i32 0
+  ret i32 %x230
+}

From 3329721e6442b42a29c54003c749bf06af7a0afc Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Mon, 15 Jul 2019 17:10:44 +0000
Subject: [PATCH 118/451] [lldb][doc] Document how our LLDB table gen
 initialized options

Summary: This patch adds documentation that should make it easier to migrate from using the old initializers to the table gen format.

Reviewers: jingham

Reviewed By: jingham

Subscribers: abidh, lldb-commits, JDevlieghere

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D64670

llvm-svn: 366083
---
 lldb/source/Commands/OptionsBase.td | 98 +++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/lldb/source/Commands/OptionsBase.td b/lldb/source/Commands/OptionsBase.td
index c4a326fbaf5f7..a81563ed28c2b 100644
--- a/lldb/source/Commands/OptionsBase.td
+++ b/lldb/source/Commands/OptionsBase.td
@@ -1,3 +1,101 @@
+
+// The fields below describe how the fields of `OptionDefinition` struct are
+// initialized by different definitions in the Options.td and this file.
+////////////////////////////////////////////////////////////////////////////////
+// Field: usage_mask
+// Default value: LLDB_OPT_SET_ALL (Option allowed in all groups)
+// Set by:
+//  - `Group`: Sets a single group to this option.
+//             Example: def foo : Option<"foo", "f">, Group<1>;
+//  - `Groups`: Sets a given list of group numbers.
+//              Example: def foo : Option<"foo", "f">, Groups<[1,4,6]>;
+//  - `GroupRange`: Sets an interval of groups. Start and end are inclusive.
+//                  Example: def foo : Option<"foo", "f">, GroupRange<1, 4>;
+//                           Sets group 1, 2, 3, 4 for the option.
+////////////////////////////////////////////////////////////////////////////////
+// Field: required
+// Default value: false (Not required)
+// Set by:
+//   - `Required`: Marks the option as required.
+//              Example: def foo : Option<"foo", "f">, Required;
+////////////////////////////////////////////////////////////////////////////////
+// Field: long_option
+// Default value: not available (has to be defined in Option)
+// Set by:
+//   - `Option` constructor: Already set by constructor.
+//                           Example: def foo : Option<"long-option", "l">
+//                                                           ^
+//                                                    long option value
+////////////////////////////////////////////////////////////////////////////////
+// Field: short_option
+// Default value: not available (has to be defined in Option)
+// Set by:
+//   - `Option` constructor: Already set by constructor.
+//                           Example: def foo : Option<"long-option", "l">
+//                                                                     ^
+//                                                                short option
+////////////////////////////////////////////////////////////////////////////////
+// Field: option_has_arg
+// Default value: OptionParser::eNoArgument (No argument allowed)
+// Set by:
+//  - `OptionalArg`: Sets the argument type and marks it as optional.
+//  - `Arg`: Sets the argument type and marks it as required.
+//  - `EnumArg`: Sets the argument type to an enum and marks it as required.
+// See argument_type field for more info.
+////////////////////////////////////////////////////////////////////////////////
+// Field: validator
+// Default value: 0 (No validator for option)
+// Set by: Nothing. This is currently only set after initialization in LLDB.
+////////////////////////////////////////////////////////////////////////////////
+// Field: enum_values
+// Default value: {} (No enum associated with this option)
+// Set by:
+//  - `EnumArg`: Sets the argument type and assigns it a enum holding the valid
+//               values. The enum needs to be a variable in the including code.
+//               Marks the option as required (see option_has_arg).
+//               Example: def foo : Option<"foo", "f">,
+//                          EnumArg<"SortOrder",
+//                          "OptionEnumValues(g_sort_option_enumeration)">;
+////////////////////////////////////////////////////////////////////////////////
+// Field: completion_type
+// Default value: CommandCompletions::eNoCompletion (no tab completion)
+// Set by:
+//  - `Completion`: Gives the option a single completion kind.
+//                  Example: def foo : Option<"foo", "f">,
+//                             Completion<"DiskFile">;
+//                           Sets the completion to eDiskFileCompletion
+//
+//  - `Completions`: Sets a given kinds of completions.
+//                   Example: def foo : Option<"foo", "f">,
+//                              Completions<["DiskFile", "DiskDirectory"]>;
+//                            Sets the completion to
+//                            `eDiskFileCompletion | eDiskDirectoryCompletion`.
+////////////////////////////////////////////////////////////////////////////////
+// Field: argument_type
+// Default value: eArgTypeNone
+// Set by:
+//  - `OptionalArg`: Sets the argument type and marks it as optional.
+//                   Example: def foo : Option<"foo", "f">, OptionalArg<"Pid">;
+//                   Sets the argument type to eArgTypePid and marks option as
+//                   optional (see option_has_arg).
+//  - `Arg`: Sets the argument type and marks it as required.
+//           Example: def foo : Option<"foo", "f">, Arg<"Pid">;
+//                    Sets the argument type to eArgTypePid and marks option as
+//                    required (see option_has_arg).
+//  - `EnumArg`: Sets the argument type and assigns it a enum holding the valid
+//               values. The enum needs to be a variable in the including code.
+//               Marks the option as required (see option_has_arg).
+//               Example: def foo : Option<"foo", "f">,
+//                          EnumArg<"SortOrder",
+//                          "OptionEnumValues(g_sort_option_enumeration)">;
+////////////////////////////////////////////////////////////////////////////////
+// Field: usage_text
+// Default value: ""
+// Set by:
+//  - `Desc`: Sets the description for the given option.
+//            Example: def foo : Option<"foo", "f">, Desc<"does nothing.">;
+//                     Sets the description to "does nothing.".
+
 // Base class for all options.
 class Option<string fullname, string shortname> {
   string FullName = fullname;

From 0e5f91575766e486aec86e464a523a66e2cbebd6 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 15 Jul 2019 17:12:08 +0000
Subject: [PATCH 119/451] Use unique_ptr instead of manual delete in one place.
 No behavior change.

llvm-svn: 366084
---
 clang/lib/Driver/Compilation.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 931dd19f04ae0..5f3026e6ce508 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -153,30 +153,28 @@ int Compilation::ExecuteCommand(const Command &C,
   if ((getDriver().CCPrintOptions ||
        getArgs().hasArg(options::OPT_v)) && !getDriver().CCGenDiagnostics) {
     raw_ostream *OS = &llvm::errs();
+    std::unique_ptr<llvm::raw_fd_ostream> OwnedStream;
 
     // Follow gcc implementation of CC_PRINT_OPTIONS; we could also cache the
     // output stream.
     if (getDriver().CCPrintOptions && getDriver().CCPrintOptionsFilename) {
       std::error_code EC;
-      OS = new llvm::raw_fd_ostream(getDriver().CCPrintOptionsFilename, EC,
-                                    llvm::sys::fs::F_Append |
-                                        llvm::sys::fs::F_Text);
+      OwnedStream.reset(new llvm::raw_fd_ostream(
+          getDriver().CCPrintOptionsFilename, EC,
+          llvm::sys::fs::F_Append | llvm::sys::fs::F_Text));
       if (EC) {
         getDriver().Diag(diag::err_drv_cc_print_options_failure)
             << EC.message();
         FailingCommand = &C;
-        delete OS;
         return 1;
       }
+      OS = OwnedStream.get();
     }
 
     if (getDriver().CCPrintOptions)
       *OS << "[Logging clang options]";
 
     C.Print(*OS, "\n", /*Quote=*/getDriver().CCPrintOptions);
-
-    if (OS != &llvm::errs())
-      delete OS;
   }
 
   std::string Error;

From 05489f095223db63c1607051edb7a5fc8c767a78 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 15 Jul 2019 17:20:34 +0000
Subject: [PATCH 120/451] Use a unique_ptr instead of manual memory management
 for CustomDiagInfo

llvm-svn: 366085
---
 clang/include/clang/Basic/DiagnosticIDs.h | 2 +-
 clang/lib/Basic/DiagnosticIDs.cpp         | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index fccd534be350f..5b9391b5a4527 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -169,7 +169,7 @@ class DiagnosticIDs : public RefCountedBase<DiagnosticIDs> {
 
 private:
   /// Information for uniquing and looking up custom diags.
-  diag::CustomDiagInfo *CustomDiagInfo;
+  std::unique_ptr<diag::CustomDiagInfo> CustomDiagInfo;
 
 public:
   DiagnosticIDs();
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index f189e5de498a1..e30e3753d1936 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -311,11 +311,9 @@ namespace clang {
 // Common Diagnostic implementation
 //===----------------------------------------------------------------------===//
 
-DiagnosticIDs::DiagnosticIDs() { CustomDiagInfo = nullptr; }
+DiagnosticIDs::DiagnosticIDs() {}
 
-DiagnosticIDs::~DiagnosticIDs() {
-  delete CustomDiagInfo;
-}
+DiagnosticIDs::~DiagnosticIDs() {}
 
 /// getCustomDiagID - Return an ID for a diagnostic with the specified message
 /// and level.  If this is the first request for this diagnostic, it is
@@ -325,7 +323,7 @@ DiagnosticIDs::~DiagnosticIDs() {
 /// mapped to a unique DiagID.
 unsigned DiagnosticIDs::getCustomDiagID(Level L, StringRef FormatString) {
   if (!CustomDiagInfo)
-    CustomDiagInfo = new diag::CustomDiagInfo();
+    CustomDiagInfo.reset(new diag::CustomDiagInfo());
   return CustomDiagInfo->getOrCreateDiagID(L, FormatString, *this);
 }
 

From cc02b170823575049a13f22ccf3529a289625f89 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 17:20:40 +0000
Subject: [PATCH 121/451] AMDGPU/GlobalISel: RegBankSelect for G_CONCAT_VECTORS

llvm-svn: 366086
---
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  3 +-
 .../regbankselect-concat-vector.mir           | 69 +++++++++++++++++++
 2 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 78cafc7f9978c..95aa32eff98a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1653,7 +1653,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     break;
   }
   case AMDGPU::G_MERGE_VALUES:
-  case AMDGPU::G_BUILD_VECTOR: {
+  case AMDGPU::G_BUILD_VECTOR:
+  case AMDGPU::G_CONCAT_VECTORS: {
     unsigned Bank = isSALUMapping(MI) ?
       AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
     unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir
new file mode 100644
index 0000000000000..01a545b755176
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-concat-vector.mir
@@ -0,0 +1,69 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: concat_vectors_v4s16_ss
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; CHECK-LABEL: name: concat_vectors_v4s16_ss
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $sgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+...
+
+---
+name: concat_vectors_v4s16_sv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; CHECK-LABEL: name: concat_vectors_v4s16_sv
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY1]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $sgpr0
+    %1:_(<2 x s16>) = COPY $vgpr0
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+...
+
+---
+name: concat_vectors_v4s16_vs
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+    ; CHECK-LABEL: name: concat_vectors_v4s16_vs
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY2]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $sgpr0
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+...
+
+---
+name: concat_vectors_v4s16_vv
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: concat_vectors_v4s16_vv
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<4 x s16>) = G_CONCAT_VECTORS %0, %1
+...

From a65913e752e00f97df20aec6bbd3fea78fef973c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 17:26:43 +0000
Subject: [PATCH 122/451] AMDGPU/GlobalISel: Select easy cases for
 G_BUILD_VECTOR

llvm-svn: 366087
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   4 +
 .../GlobalISel/inst-select-build-vector.mir   | 152 ++++++++++++++++++
 2 files changed, 156 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5b2dfc8ff50b0..6b827db6faa80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -341,6 +341,9 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
 
   const unsigned SrcSize = SrcTy.getSizeInBits();
+  if (SrcSize < 32)
+    return false;
+
   const DebugLoc &DL = MI.getDebugLoc();
   const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
   const unsigned DstSize = DstTy.getSizeInBits();
@@ -1235,6 +1238,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
   case TargetOpcode::G_EXTRACT:
     return selectG_EXTRACT(I);
   case TargetOpcode::G_MERGE_VALUES:
+  case TargetOpcode::G_BUILD_VECTOR:
   case TargetOpcode::G_CONCAT_VECTORS:
     return selectG_MERGE_VALUES(I);
   case TargetOpcode::G_UNMERGE_VALUES:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir
new file mode 100644
index 0000000000000..d97118bf14864
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector.mir
@@ -0,0 +1,152 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs  -o - %s | FileCheck -check-prefix=GCN  %s
+
+---
+name: test_build_vector_v_v2s32_v_s32_v_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GCN-LABEL: name: test_build_vector_v_v2s32_v_s32_v_s32
+    ; GCN: liveins: $vgpr0, $vgpr1
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_build_vector_v_v2s32_s_s32_v_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GCN-LABEL: name: test_build_vector_v_v2s32_s_s32_v_s32
+    ; GCN: liveins: $sgpr0, $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_build_vector_v_v2s32_v_s32_s_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    ; GCN-LABEL: name: test_build_vector_v_v2s32_v_s32_s_s32
+    ; GCN: liveins: $sgpr0, $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_build_vector_s_v2s32_s_s32_s_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; GCN-LABEL: name: test_build_vector_s_v2s32_s_s32_s_s32
+    ; GCN: liveins: $sgpr0, $sgpr1
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(<2 x s32>) = G_BUILD_VECTOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_build_vector_s_v2s32_undef_s_s32_s_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: test_build_vector_s_v2s32_undef_s_s32_s_s32
+    ; GCN: liveins: $sgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE undef %2:sreg_32_xm0, %subreg.sub0, [[COPY]], %subreg.sub1
+    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:sgpr(<2 x s32>) = G_BUILD_VECTOR undef %0:sgpr(s32), %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_build_vector_s_v2s32_s_s32_undef_s_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: test_build_vector_s_v2s32_s_s32_undef_s_s32
+    ; GCN: liveins: $sgpr0
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[COPY]], %subreg.sub0, undef %2:sreg_32_xm0, %subreg.sub1
+    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %2:sgpr(<2 x s32>) = G_BUILD_VECTOR %0, undef %1:sgpr(s32),
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_build_vector_s_v2s64_s_s64_s_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+    ; GCN-LABEL: name: test_build_vector_s_v2s64_s_s64_s_s64
+    ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GCN: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3
+    ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[REG_SEQUENCE]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s64) = COPY $sgpr2_sgpr3
+    %4:sgpr(<2 x s64>) = G_BUILD_VECTOR %0, %1
+    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %4
+...

From a2dcbd3643993b67c0722f58610bed138626cd86 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 15 Jul 2019 17:27:46 +0000
Subject: [PATCH 123/451] Use a unique_ptr instead of manual memory management
 for LineTable

llvm-svn: 366088
---
 clang/include/clang/Basic/SourceManager.h | 2 +-
 clang/lib/Basic/SourceManager.cpp         | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 388fc1c1f8622..e32f749ae6abd 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -679,7 +679,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   /// Holds information for \#line directives.
   ///
   /// This is referenced by indices from SLocEntryTable.
-  LineTableInfo *LineTable = nullptr;
+  std::unique_ptr<LineTableInfo> LineTable;
 
   /// These ivars serve as a cache used in the getLineNumber
   /// method which is used to speedup getLineNumber calls to nearby locations.
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index c7588aa796fe7..12b0305e707c9 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -329,7 +329,7 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
 
 LineTableInfo &SourceManager::getLineTable() {
   if (!LineTable)
-    LineTable = new LineTableInfo();
+    LineTable.reset(new LineTableInfo());
   return *LineTable;
 }
 
@@ -345,8 +345,6 @@ SourceManager::SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr,
 }
 
 SourceManager::~SourceManager() {
-  delete LineTable;
-
   // Delete FileEntry objects corresponding to content caches.  Since the actual
   // content cache objects are bump pointer allocated, we just have to run the
   // dtors, but we call the deallocate method for completeness.

From 8e7eee617a0915ca3153aed72da7c825f6263e25 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 15 Jul 2019 17:29:06 +0000
Subject: [PATCH 124/451] [ARM] Minor formatting in ARMInstrMVE.td. NFC

llvm-svn: 366089
---
 llvm/lib/Target/ARM/ARMInstrMVE.td | 68 +++++++++++++++---------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1091bcc9d1f02..1880daa629412 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -995,27 +995,27 @@ def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
 def MVE_VREV16_8  : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
 
 let Predicates = [HasMVEInt] in {
-def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
-          (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
-def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
-          (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
-def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
-          (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
+  def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
+            (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
+  def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
+            (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
+  def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
+            (v16i8 (MVE_VREV64_8  (v16i8 MQPR:$src)))>;
 
-def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
-          (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
-def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
-          (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
+  def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
+            (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
+  def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
+            (v16i8 (MVE_VREV32_8  (v16i8 MQPR:$src)))>;
 
-def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
-          (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
+  def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
+            (v16i8 (MVE_VREV16_8  (v16i8 MQPR:$src)))>;
 
-def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
-          (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
-def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
-          (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
-def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
-          (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+  def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
+            (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
+  def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
+            (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
+  def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
+            (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
 }
 
 def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
@@ -2730,22 +2730,22 @@ def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
 def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
 
 let Predicates = [HasMVEFloat] in {
-def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
-          (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
-def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
-          (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
-def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
-          (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
-def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
-          (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
-def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
-          (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
-          (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
-def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
-          (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
-def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
-          (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
+  def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
+            (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
+  def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
+            (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
+  def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
+            (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
+  def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
+            (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
+  def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
+            (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
+  def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
+            (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
+  def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
+            (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
+  def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
+            (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
 }
 
 class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,

From 3dcd7996f1112e8f19f52a99ea7f2903572929dd Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <jdoerfert@anl.gov>
Date: Mon, 15 Jul 2019 17:31:26 +0000
Subject: [PATCH 125/451] [FunctionAttrs] Remove readonly and writeonly
 assertion

There are scenarios where mutually recursive functions may cause the SCC
to contain both read only and write only functions. This removes an
assertion when adding read attributes which caused a crash with a the
provided test case, and instead just doesn't add the attributes.

Patch by Luke Lau <luke.lau@intel.com>

Differential Revision: https://reviews.llvm.org/D60761

llvm-svn: 366090
---
 llvm/lib/Transforms/IPO/FunctionAttrs.cpp     |  7 +++++--
 .../FunctionAttrs/read-write-scc.ll           | 20 +++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/FunctionAttrs/read-write-scc.ll

diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index cfae267c2f64c..5ccd8bc4b0fbd 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -261,12 +261,15 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
     }
   }
 
+  // If the SCC contains both functions that read and functions that write, then
+  // we cannot add readonly attributes.
+  if (ReadsMemory && WritesMemory)
+    return false;
+
   // Success!  Functions in this SCC do not access memory, or only read memory.
   // Give them the appropriate attribute.
   bool MadeChange = false;
 
-  assert(!(ReadsMemory && WritesMemory) &&
-          "Function marked read-only and write-only");
   for (Function *F : SCCNodes) {
     if (F->doesNotAccessMemory())
       // Already perfect!
diff --git a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
new file mode 100644
index 0000000000000..319aaf0136233
--- /dev/null
+++ b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -functionattrs < %s | FileCheck %s
+; RUN: opt -S -passes=function-attrs < %s | FileCheck %s
+
+@i = global i32 0
+
+define void @foo() {
+; CHECK-LABEL: define void @foo() #0 {
+  store i32 1, i32* @i
+  call void @bar()
+  ret void
+}
+
+define void @bar() {
+; CHECK-LABEL: define void @bar() #0 {
+  %i = load i32, i32* @i
+  call void @foo()
+  ret void
+}
+
+; CHECK: attributes #0 = { nofree nounwind }

From 56799837a41b24884f2b4f4609f00d6e9dc5607e Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Mon, 15 Jul 2019 17:47:22 +0000
Subject: [PATCH 126/451] Update __VERSION__ to remove the hardcoded 4.2.1
 version

Summary:
Just like in https://reviews.llvm.org/D56803
for -dumpversion

Reviewers: rnk

Reviewed By: rnk

Subscribers: dexonsmith, lebedev.ri, hubert.reinterpretcast, xbolva00, fedor.sergeev, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D63048

llvm-svn: 366091
---
 clang/docs/LanguageExtensions.rst       | 2 ++
 clang/docs/ReleaseNotes.rst             | 6 ++++++
 clang/lib/Frontend/InitPreprocessor.cpp | 7 +++----
 clang/test/Preprocessor/init.c          | 4 ++--
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 266309c6ce248..44fa2b2ec0980 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -324,6 +324,8 @@ option for a warning and returns true if that is a valid warning option.
   ...
   #endif
 
+.. _languageextensions-builtin-macros:
+
 Builtin Macros
 ==============
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f89447fc96a3c..dadcc77f4803e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -56,6 +56,12 @@ Improvements to Clang's diagnostics
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
+- The ``__VERSION__`` macro has been updated.
+  Previously this macro contained the string '4.2.1 Compatible' to achieve
+  compatibility with GCC 4.2.1, but that should no longer be necessary.
+  However, to retrieve Clang's version, please favor the one of the macro
+  defined in :ref:`clang namespaced version macros <languageextensions-builtin-macros>`.
+
 - ...
 
 
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 1741ba5e5203e..11ebab9454871 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -604,10 +604,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   // Support for #pragma redefine_extname (Sun compatibility)
   Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1");
 
-  // As sad as it is, enough software depends on the __VERSION__ for version
-  // checks that it is necessary to report 4.2.1 (the base GCC version we claim
-  // compatibility with) first.
-  Builder.defineMacro("__VERSION__", "\"4.2.1 Compatible " +
+  // Previously this macro was set to a string aiming to achieve compatibility
+  // with GCC 4.2.1. Now, just return the full Clang version
+  Builder.defineMacro("__VERSION__", "\"" +
                       Twine(getClangFullCPPVersion()) + "\"");
 
   // Initialize language-specific preprocessor defines.
diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index 8df3b4bd2ccf4..fce85e05f63f5 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -8169,7 +8169,7 @@
 // SPARC:#define __UINT_LEAST8_MAX__ 255
 // SPARC:#define __UINT_LEAST8_TYPE__ unsigned char
 // SPARC:#define __USER_LABEL_PREFIX__
-// SPARC:#define __VERSION__ "4.2.1 Compatible{{.*}}
+// SPARC:#define __VERSION__ "Clang{{.*}}
 // SPARC:#define __WCHAR_MAX__ 2147483647
 // SPARC:#define __WCHAR_TYPE__ int
 // SPARC:#define __WCHAR_WIDTH__ 32
@@ -9041,7 +9041,7 @@
 // X86_64-CLOUDABI:#define __UINT_LEAST8_MAX__ 255
 // X86_64-CLOUDABI:#define __UINT_LEAST8_TYPE__ unsigned char
 // X86_64-CLOUDABI:#define __USER_LABEL_PREFIX__
-// X86_64-CLOUDABI:#define __VERSION__ "4.2.1 Compatible{{.*}}
+// X86_64-CLOUDABI:#define __VERSION__ "Clang{{.*}}
 // X86_64-CLOUDABI:#define __WCHAR_MAX__ 2147483647
 // X86_64-CLOUDABI:#define __WCHAR_TYPE__ int
 // X86_64-CLOUDABI:#define __WCHAR_WIDTH__ 32

From 7938424eb9287679623259d84c316cf5a96a8c83 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Mon, 15 Jul 2019 17:49:25 +0000
Subject: [PATCH 127/451] [AMDGPU] Copy missing predicate from pseudo to real

NFC at the momemnt, needed for future commit.

Differential Revision: https://reviews.llvm.org/D64761

llvm-svn: 366092
---
 llvm/lib/Target/AMDGPU/DSInstructions.td | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e39f565fd225d..c52eaaa3fdc5f 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -61,6 +61,7 @@ class DS_Real <DS_Pseudo ds> :
 
   // copy relevant pseudo op flags
   let SubtargetPredicate = ds.SubtargetPredicate;
+  let OtherPredicates = ds.OtherPredicates;
   let AsmMatchConverter  = ds.AsmMatchConverter;
 
   // encoding fields

From 269e4e1b60127711cf6d8e76291342c907b95a8b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 17:50:28 +0000
Subject: [PATCH 128/451] Add some release notes for 9.0 release

llvm-svn: 366093
---
 llvm/docs/ReleaseNotes.rst | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index c3ce86bf615b2..ebf2c8db7baf8 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -76,6 +76,10 @@ Changes to the LLVM IR
   pointee type. In the next release we intend to make this parameter
   mandatory in preparation for opaque pointer types.
 
+* ``atomicrmw xchg`` now allows floating point types
+
+* ``atomicrmw`` now supports ``fadd`` and ``fsub``
+
 Changes to building LLVM
 ------------------------
 
@@ -107,7 +111,13 @@ Changes to the X86 Target
 Changes to the AMDGPU Target
 -----------------------------
 
- During this release ...
+* Function call support is now enabled by default
+
+* Improved support for 96-bit loads and stores
+
+* DPP combiner pass is now enabled by default
+
+* Support for gfx10
 
 Changes to the AVR Target
 -----------------------------

From 49169a963e842ac80829896522fa132327b37755 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 17:50:31 +0000
Subject: [PATCH 129/451] AMDGPU: Add 24-bit mul intrinsics

Insert these during codegenprepare.

This works around a DAG issue where generic combines eliminate the and
asserting the high bits are zero, which then exposes an unknown read
source to the mul combine. It doesn't worth the hassle of trying to
insert an AssertZext or something to try to deal with it.

llvm-svn: 366094
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  10 +
 .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp    | 127 +++++
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   5 +
 .../AMDGPU/amdgpu-codegenprepare-mul24.ll     | 494 ++++++++++++++++++
 .../CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll     |  14 +
 .../CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll     |  14 +
 llvm/test/CodeGen/AMDGPU/mad_uint24.ll        |  76 +++
 llvm/test/CodeGen/AMDGPU/mul.i16.ll           |  18 +-
 llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll |   4 +-
 9 files changed, 751 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 43e827ec6ab99..e92a6078ce479 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1350,6 +1350,16 @@ def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
+def int_amdgcn_mul_i24 : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
+  [llvm_i32_ty, llvm_i32_ty],
+  [IntrNoMem, IntrSpeculatable]
+>;
+
 // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
 //
 // bar_val is the total number of waves that will wait on this
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 759820753255a..b750c6b5f6d20 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -61,6 +61,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   AssumptionCache *AC = nullptr;
   LegacyDivergenceAnalysis *DA = nullptr;
   Module *Mod = nullptr;
+  const DataLayout *DL = nullptr;
   bool HasUnsafeFPMath = false;
 
   /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
@@ -133,6 +134,16 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   /// \returns True.
   bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
 
+
+  unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+  unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
+  bool isI24(Value *V, unsigned ScalarSize) const;
+  bool isU24(Value *V, unsigned ScalarSize) const;
+
+  /// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
+  /// SelectionDAG has an issue where an and asserting the bits are known
+  bool replaceMulWithMul24(BinaryOperator &I) const;
+
   /// Expands 24 bit div or rem.
   Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
                         Value *Num, Value *Den,
@@ -392,6 +403,118 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
   return true;
 }
 
+unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op,
+                                               unsigned ScalarSize) const {
+  KnownBits Known = computeKnownBits(Op, *DL, 0, AC);
+  return ScalarSize - Known.countMinLeadingZeros();
+}
+
+unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
+                                             unsigned ScalarSize) const {
+  // In order for this to be a signed 24-bit value, bit 23, must
+  // be a sign bit.
+  return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
+}
+
+bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
+  return ScalarSize >= 24 && // Types less than 24-bit should be treated
+                                     // as unsigned 24-bit values.
+    numBitsSigned(V, ScalarSize) < 24;
+}
+
+bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
+  return numBitsUnsigned(V, ScalarSize) <= 24;
+}
+
+static void extractValues(IRBuilder<> &Builder,
+                          SmallVectorImpl<Value *> &Values, Value *V) {
+  VectorType *VT = dyn_cast<VectorType>(V->getType());
+  if (!VT) {
+    Values.push_back(V);
+    return;
+  }
+
+  for (int I = 0, E = VT->getNumElements(); I != E; ++I)
+    Values.push_back(Builder.CreateExtractElement(V, I));
+}
+
+static Value *insertValues(IRBuilder<> &Builder,
+                           Type *Ty,
+                           SmallVectorImpl<Value *> &Values) {
+  if (Values.size() == 1)
+    return Values[0];
+
+  Value *NewVal = UndefValue::get(Ty);
+  for (int I = 0, E = Values.size(); I != E; ++I)
+    NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
+
+  return NewVal;
+}
+
+bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
+  if (I.getOpcode() != Instruction::Mul)
+    return false;
+
+  Type *Ty = I.getType();
+  unsigned Size = Ty->getScalarSizeInBits();
+  if (Size <= 16 && ST->has16BitInsts())
+    return false;
+
+  // Prefer scalar if this could be s_mul_i32
+  if (DA->isUniform(&I))
+    return false;
+
+  Value *LHS = I.getOperand(0);
+  Value *RHS = I.getOperand(1);
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+
+  // TODO: Should this try to match mulhi24?
+  if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
+    IntrID = Intrinsic::amdgcn_mul_u24;
+  } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
+    IntrID = Intrinsic::amdgcn_mul_i24;
+  } else
+    return false;
+
+  SmallVector<Value *, 4> LHSVals;
+  SmallVector<Value *, 4> RHSVals;
+  SmallVector<Value *, 4> ResultVals;
+  extractValues(Builder, LHSVals, LHS);
+  extractValues(Builder, RHSVals, RHS);
+
+
+  IntegerType *I32Ty = Builder.getInt32Ty();
+  FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
+  for (int I = 0, E = LHSVals.size(); I != E; ++I) {
+    Value *LHS, *RHS;
+    if (IntrID == Intrinsic::amdgcn_mul_u24) {
+      LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+      RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
+    } else {
+      LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
+      RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
+    }
+
+    Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
+
+    if (IntrID == Intrinsic::amdgcn_mul_u24) {
+      ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
+                                                     LHSVals[I]->getType()));
+    } else {
+      ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
+                                                     LHSVals[I]->getType()));
+    }
+  }
+
+  I.replaceAllUsesWith(insertValues(Builder, Ty, ResultVals));
+  I.eraseFromParent();
+
+  return true;
+}
+
 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv, bool HasDenormals) {
   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
   if (!CNum)
@@ -756,6 +879,9 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
       DA->isUniform(&I) && promoteUniformOpToI32(I))
     return true;
 
+  if (replaceMulWithMul24(I))
+    return true;
+
   bool Changed = false;
   Instruction::BinaryOps Opc = I.getOpcode();
   Type *Ty = I.getType();
@@ -882,6 +1008,7 @@ bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
 
 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
   Mod = &M;
+  DL = &Mod->getDataLayout();
   return false;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b90a0d28e9ef0..a3226577cd02b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5836,6 +5836,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::amdgcn_cos:
     return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));
 
+  case Intrinsic::amdgcn_mul_u24:
+    return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::amdgcn_mul_i24:
+    return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));
+
   case Intrinsic::amdgcn_log_clamp: {
     if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
       return SDValue();
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
new file mode 100644
index 0000000000000..cda1da825f942
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
@@ -0,0 +1,494 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefix=SI %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=fiji -amdgpu-codegenprepare %s | FileCheck -check-prefix=VI %s
+
+define i16 @mul_i16(i16 %lhs, i16 %rhs) {
+; SI-LABEL: @mul_i16(
+; SI-NEXT:    [[TMP1:%.*]] = zext i16 [[LHS:%.*]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = zext i16 [[RHS:%.*]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
+; SI-NEXT:    ret i16 [[TMP4]]
+;
+; VI-LABEL: @mul_i16(
+; VI-NEXT:    [[MUL:%.*]] = mul i16 [[LHS:%.*]], [[RHS:%.*]]
+; VI-NEXT:    ret i16 [[MUL]]
+;
+  %mul = mul i16 %lhs, %rhs
+  ret i16 %mul
+}
+
+define i32 @smul24_i32(i32 %lhs, i32 %rhs) {
+; SI-LABEL: @smul24_i32(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8
+; SI-NEXT:    [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
+; SI-NEXT:    [[RHS24:%.*]] = ashr i32 [[LHS]], 8
+; SI-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
+; SI-NEXT:    ret i32 [[TMP1]]
+;
+; VI-LABEL: @smul24_i32(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 8
+; VI-NEXT:    [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 8
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 8
+; VI-NEXT:    [[RHS24:%.*]] = ashr i32 [[LHS]], 8
+; VI-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[LHS24]], i32 [[RHS24]])
+; VI-NEXT:    ret i32 [[TMP1]]
+;
+  %shl.lhs = shl i32 %lhs, 8
+  %lhs24 = ashr i32 %shl.lhs, 8
+  %lshr.rhs = shl i32 %rhs, 8
+  %rhs24 = ashr i32 %lhs, 8
+  %mul = mul i32 %lhs24, %rhs24
+  ret i32 %mul
+}
+
+define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; SI-LABEL: @smul24_v2i32(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
+; SI-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
+; SI-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
+; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
+; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
+; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
+; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
+; SI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
+; SI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
+; SI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; SI-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
+; SI-NEXT:    ret <2 x i32> [[TMP8]]
+;
+; VI-LABEL: @smul24_v2i32(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
+; VI-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
+; VI-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[LHS]], <i32 8, i32 8>
+; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
+; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
+; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
+; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
+; VI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP3]])
+; VI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP2]], i32 [[TMP4]])
+; VI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; VI-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
+; VI-NEXT:    ret <2 x i32> [[TMP8]]
+;
+  %shl.lhs = shl <2 x i32> %lhs, <i32 8, i32 8>
+  %lhs24 = ashr <2 x i32> %shl.lhs, <i32 8, i32 8>
+  %lshr.rhs = shl <2 x i32> %rhs, <i32 8, i32 8>
+  %rhs24 = ashr <2 x i32> %lhs, <i32 8, i32 8>
+  %mul = mul <2 x i32> %lhs24, %rhs24
+  ret <2 x i32> %mul
+}
+
+define i32 @umul24_i32(i32 %lhs, i32 %rhs) {
+; SI-LABEL: @umul24_i32(
+; SI-NEXT:    [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
+; SI-NEXT:    [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
+; SI-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
+; SI-NEXT:    ret i32 [[TMP1]]
+;
+; VI-LABEL: @umul24_i32(
+; VI-NEXT:    [[LHS24:%.*]] = and i32 [[LHS:%.*]], 16777215
+; VI-NEXT:    [[RHS24:%.*]] = and i32 [[RHS:%.*]], 16777215
+; VI-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[LHS24]], i32 [[RHS24]])
+; VI-NEXT:    ret i32 [[TMP1]]
+;
+  %lhs24 = and i32 %lhs, 16777215
+  %rhs24 = and i32 %rhs, 16777215
+  %mul = mul i32 %lhs24, %rhs24
+  ret i32 %mul
+}
+
+define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; SI-LABEL: @umul24_v2i32(
+; SI-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
+; SI-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
+; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
+; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
+; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
+; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
+; SI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
+; SI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
+; SI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; SI-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
+; SI-NEXT:    ret <2 x i32> [[TMP8]]
+;
+; VI-LABEL: @umul24_v2i32(
+; VI-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
+; VI-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
+; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
+; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
+; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
+; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[RHS24]], i64 1
+; VI-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP3]])
+; VI-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP2]], i32 [[TMP4]])
+; VI-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i64 0
+; VI-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP6]], i64 1
+; VI-NEXT:    ret <2 x i32> [[TMP8]]
+;
+  %lhs24 = and <2 x i32> %lhs, <i32 16777215, i32 16777215>
+  %rhs24 = and <2 x i32> %rhs, <i32 16777215, i32 16777215>
+  %mul = mul <2 x i32> %lhs24, %rhs24
+  ret <2 x i32> %mul
+}
+
+define i64 @smul24_i64(i64 %lhs, i64 %rhs) {
+; SI-LABEL: @smul24_i64(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40
+; SI-NEXT:    [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40
+; SI-NEXT:    [[RHS24:%.*]] = ashr i64 [[LHS]], 40
+; SI-NEXT:    [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
+; SI-NEXT:    ret i64 [[TMP4]]
+;
+; VI-LABEL: @smul24_i64(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl i64 [[LHS:%.*]], 40
+; VI-NEXT:    [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40
+; VI-NEXT:    [[RHS24:%.*]] = ashr i64 [[LHS]], 40
+; VI-NEXT:    [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
+; VI-NEXT:    [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
+; VI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
+; VI-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
+; VI-NEXT:    ret i64 [[TMP4]]
+;
+  %shl.lhs = shl i64 %lhs, 40
+  %lhs24 = ashr i64 %shl.lhs, 40
+  %lshr.rhs = shl i64 %rhs, 40
+  %rhs24 = ashr i64 %lhs, 40
+  %mul = mul i64 %lhs24, %rhs24
+  ret i64 %mul
+}
+
+define i64 @umul24_i64(i64 %lhs, i64 %rhs) {
+; SI-LABEL: @umul24_i64(
+; SI-NEXT:    [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
+; SI-NEXT:    [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
+; SI-NEXT:    [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; SI-NEXT:    ret i64 [[TMP4]]
+;
+; VI-LABEL: @umul24_i64(
+; VI-NEXT:    [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215
+; VI-NEXT:    [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215
+; VI-NEXT:    [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32
+; VI-NEXT:    [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32
+; VI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; VI-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; VI-NEXT:    ret i64 [[TMP4]]
+;
+  %lhs24 = and i64 %lhs, 16777215
+  %rhs24 = and i64 %rhs, 16777215
+  %mul = mul i64 %lhs24, %rhs24
+  ret i64 %mul
+}
+
+define i31 @smul24_i31(i31 %lhs, i31 %rhs) {
+; SI-LABEL: @smul24_i31(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7
+; SI-NEXT:    [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7
+; SI-NEXT:    [[RHS24:%.*]] = ashr i31 [[LHS]], 7
+; SI-NEXT:    [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
+; SI-NEXT:    ret i31 [[TMP4]]
+;
+; VI-LABEL: @smul24_i31(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl i31 [[LHS:%.*]], 7
+; VI-NEXT:    [[LHS24:%.*]] = ashr i31 [[SHL_LHS]], 7
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl i31 [[RHS:%.*]], 7
+; VI-NEXT:    [[RHS24:%.*]] = ashr i31 [[LHS]], 7
+; VI-NEXT:    [[TMP1:%.*]] = sext i31 [[LHS24]] to i32
+; VI-NEXT:    [[TMP2:%.*]] = sext i31 [[RHS24]] to i32
+; VI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
+; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
+; VI-NEXT:    ret i31 [[TMP4]]
+;
+  %shl.lhs = shl i31 %lhs, 7
+  %lhs24 = ashr i31 %shl.lhs, 7
+  %lshr.rhs = shl i31 %rhs, 7
+  %rhs24 = ashr i31 %lhs, 7
+  %mul = mul i31 %lhs24, %rhs24
+  ret i31 %mul
+}
+
+define i31 @umul24_i31(i31 %lhs, i31 %rhs) {
+; SI-LABEL: @umul24_i31(
+; SI-NEXT:    [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215
+; SI-NEXT:    [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215
+; SI-NEXT:    [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
+; SI-NEXT:    ret i31 [[TMP4]]
+;
+; VI-LABEL: @umul24_i31(
+; VI-NEXT:    [[LHS24:%.*]] = and i31 [[LHS:%.*]], 16777215
+; VI-NEXT:    [[RHS24:%.*]] = and i31 [[RHS:%.*]], 16777215
+; VI-NEXT:    [[TMP1:%.*]] = zext i31 [[LHS24]] to i32
+; VI-NEXT:    [[TMP2:%.*]] = zext i31 [[RHS24]] to i32
+; VI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; VI-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i31
+; VI-NEXT:    ret i31 [[TMP4]]
+;
+  %lhs24 = and i31 %lhs, 16777215
+  %rhs24 = and i31 %rhs, 16777215
+  %mul = mul i31 %lhs24, %rhs24
+  ret i31 %mul
+}
+
+define <2 x i31> @umul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
+; SI-LABEL: @umul24_v2i31(
+; SI-NEXT:    [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], <i31 16777215, i31 16777215>
+; SI-NEXT:    [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], <i31 16777215, i31 16777215>
+; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
+; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
+; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
+; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
+; SI-NEXT:    [[TMP5:%.*]] = zext i31 [[TMP1]] to i32
+; SI-NEXT:    [[TMP6:%.*]] = zext i31 [[TMP3]] to i32
+; SI-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])
+; SI-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
+; SI-NEXT:    [[TMP9:%.*]] = zext i31 [[TMP2]] to i32
+; SI-NEXT:    [[TMP10:%.*]] = zext i31 [[TMP4]] to i32
+; SI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
+; SI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
+; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; SI-NEXT:    [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
+; SI-NEXT:    ret <2 x i31> [[TMP14]]
+;
+; VI-LABEL: @umul24_v2i31(
+; VI-NEXT:    [[LHS24:%.*]] = and <2 x i31> [[LHS:%.*]], <i31 16777215, i31 16777215>
+; VI-NEXT:    [[RHS24:%.*]] = and <2 x i31> [[RHS:%.*]], <i31 16777215, i31 16777215>
+; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
+; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
+; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
+; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
+; VI-NEXT:    [[TMP5:%.*]] = zext i31 [[TMP1]] to i32
+; VI-NEXT:    [[TMP6:%.*]] = zext i31 [[TMP3]] to i32
+; VI-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP5]], i32 [[TMP6]])
+; VI-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
+; VI-NEXT:    [[TMP9:%.*]] = zext i31 [[TMP2]] to i32
+; VI-NEXT:    [[TMP10:%.*]] = zext i31 [[TMP4]] to i32
+; VI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP9]], i32 [[TMP10]])
+; VI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
+; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; VI-NEXT:    [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
+; VI-NEXT:    ret <2 x i31> [[TMP14]]
+;
+  %lhs24 = and <2 x i31> %lhs, <i31 16777215, i31 16777215>
+  %rhs24 = and <2 x i31> %rhs, <i31 16777215, i31 16777215>
+  %mul = mul <2 x i31> %lhs24, %rhs24
+  ret <2 x i31> %mul
+}
+
+define <2 x i31> @smul24_v2i31(<2 x i31> %lhs, <2 x i31> %rhs) {
+; SI-LABEL: @smul24_v2i31(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], <i31 8, i31 8>
+; SI-NEXT:    [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], <i31 8, i31 8>
+; SI-NEXT:    [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
+; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
+; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
+; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
+; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
+; SI-NEXT:    [[TMP5:%.*]] = sext i31 [[TMP1]] to i32
+; SI-NEXT:    [[TMP6:%.*]] = sext i31 [[TMP3]] to i32
+; SI-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
+; SI-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
+; SI-NEXT:    [[TMP9:%.*]] = sext i31 [[TMP2]] to i32
+; SI-NEXT:    [[TMP10:%.*]] = sext i31 [[TMP4]] to i32
+; SI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
+; SI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
+; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; SI-NEXT:    [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
+; SI-NEXT:    ret <2 x i31> [[TMP14]]
+;
+; VI-LABEL: @smul24_v2i31(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i31> [[LHS:%.*]], <i31 8, i31 8>
+; VI-NEXT:    [[LHS24:%.*]] = ashr <2 x i31> [[SHL_LHS]], <i31 8, i31 8>
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl <2 x i31> [[RHS:%.*]], <i31 8, i31 8>
+; VI-NEXT:    [[RHS24:%.*]] = ashr <2 x i31> [[LHS]], <i31 8, i31 8>
+; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i31> [[LHS24]], i64 0
+; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i31> [[LHS24]], i64 1
+; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i31> [[RHS24]], i64 0
+; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i31> [[RHS24]], i64 1
+; VI-NEXT:    [[TMP5:%.*]] = sext i31 [[TMP1]] to i32
+; VI-NEXT:    [[TMP6:%.*]] = sext i31 [[TMP3]] to i32
+; VI-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
+; VI-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i31
+; VI-NEXT:    [[TMP9:%.*]] = sext i31 [[TMP2]] to i32
+; VI-NEXT:    [[TMP10:%.*]] = sext i31 [[TMP4]] to i32
+; VI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
+; VI-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i31
+; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i31> undef, i31 [[TMP8]], i64 0
+; VI-NEXT:    [[TMP14:%.*]] = insertelement <2 x i31> [[TMP13]], i31 [[TMP12]], i64 1
+; VI-NEXT:    ret <2 x i31> [[TMP14]]
+;
+  %shl.lhs = shl <2 x i31> %lhs, <i31 8, i31 8>
+  %lhs24 = ashr <2 x i31> %shl.lhs, <i31 8, i31 8>
+  %lshr.rhs = shl <2 x i31> %rhs, <i31 8, i31 8>
+  %rhs24 = ashr <2 x i31> %lhs, <i31 8, i31 8>
+  %mul = mul <2 x i31> %lhs24, %rhs24
+  ret <2 x i31> %mul
+}
+
+define i33 @smul24_i33(i33 %lhs, i33 %rhs) {
+; SI-LABEL: @smul24_i33(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9
+; SI-NEXT:    [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9
+; SI-NEXT:    [[RHS24:%.*]] = ashr i33 [[LHS]], 9
+; SI-NEXT:    [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i33
+; SI-NEXT:    ret i33 [[TMP4]]
+;
+; VI-LABEL: @smul24_i33(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl i33 [[LHS:%.*]], 9
+; VI-NEXT:    [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9
+; VI-NEXT:    [[RHS24:%.*]] = ashr i33 [[LHS]], 9
+; VI-NEXT:    [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
+; VI-NEXT:    [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
+; VI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]])
+; VI-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i33
+; VI-NEXT:    ret i33 [[TMP4]]
+;
+  %shl.lhs = shl i33 %lhs, 9
+  %lhs24 = ashr i33 %shl.lhs, 9
+  %lshr.rhs = shl i33 %rhs, 9
+  %rhs24 = ashr i33 %lhs, 9
+  %mul = mul i33 %lhs24, %rhs24
+  ret i33 %mul
+}
+
+define i33 @umul24_i33(i33 %lhs, i33 %rhs) {
+; SI-LABEL: @umul24_i33(
+; SI-NEXT:    [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215
+; SI-NEXT:    [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215
+; SI-NEXT:    [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
+; SI-NEXT:    [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
+; SI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; SI-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i33
+; SI-NEXT:    ret i33 [[TMP4]]
+;
+; VI-LABEL: @umul24_i33(
+; VI-NEXT:    [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215
+; VI-NEXT:    [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215
+; VI-NEXT:    [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32
+; VI-NEXT:    [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32
+; VI-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]])
+; VI-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i33
+; VI-NEXT:    ret i33 [[TMP4]]
+;
+  %lhs24 = and i33 %lhs, 16777215
+  %rhs24 = and i33 %rhs, 16777215
+  %mul = mul i33 %lhs24, %rhs24
+  ret i33 %mul
+}
+
+define i32 @smul25_i32(i32 %lhs, i32 %rhs) {
+; SI-LABEL: @smul25_i32(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7
+; SI-NEXT:    [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7
+; SI-NEXT:    [[RHS24:%.*]] = ashr i32 [[LHS]], 7
+; SI-NEXT:    [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
+; SI-NEXT:    ret i32 [[MUL]]
+;
+; VI-LABEL: @smul25_i32(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl i32 [[LHS:%.*]], 7
+; VI-NEXT:    [[LHS24:%.*]] = ashr i32 [[SHL_LHS]], 7
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl i32 [[RHS:%.*]], 7
+; VI-NEXT:    [[RHS24:%.*]] = ashr i32 [[LHS]], 7
+; VI-NEXT:    [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
+; VI-NEXT:    ret i32 [[MUL]]
+;
+  %shl.lhs = shl i32 %lhs, 7
+  %lhs24 = ashr i32 %shl.lhs, 7
+  %lshr.rhs = shl i32 %rhs, 7
+  %rhs24 = ashr i32 %lhs, 7
+  %mul = mul i32 %lhs24, %rhs24
+  ret i32 %mul
+}
+
+define i32 @umul25_i32(i32 %lhs, i32 %rhs) {
+; SI-LABEL: @umul25_i32(
+; SI-NEXT:    [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431
+; SI-NEXT:    [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431
+; SI-NEXT:    [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
+; SI-NEXT:    ret i32 [[MUL]]
+;
+; VI-LABEL: @umul25_i32(
+; VI-NEXT:    [[LHS24:%.*]] = and i32 [[LHS:%.*]], 33554431
+; VI-NEXT:    [[RHS24:%.*]] = and i32 [[RHS:%.*]], 33554431
+; VI-NEXT:    [[MUL:%.*]] = mul i32 [[LHS24]], [[RHS24]]
+; VI-NEXT:    ret i32 [[MUL]]
+;
+  %lhs24 = and i32 %lhs, 33554431
+  %rhs24 = and i32 %rhs, 33554431
+  %mul = mul i32 %lhs24, %rhs24
+  ret i32 %mul
+}
+
+define <2 x i33> @smul24_v2i33(<2 x i33> %lhs, <2 x i33> %rhs) {
+; SI-LABEL: @smul24_v2i33(
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], <i33 9, i33 9>
+; SI-NEXT:    [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
+; SI-NEXT:    [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], <i33 9, i33 9>
+; SI-NEXT:    [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
+; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0
+; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1
+; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0
+; SI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1
+; SI-NEXT:    [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32
+; SI-NEXT:    [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32
+; SI-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
+; SI-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i33
+; SI-NEXT:    [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32
+; SI-NEXT:    [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32
+; SI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
+; SI-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i33
+; SI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0
+; SI-NEXT:    [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
+; SI-NEXT:    ret <2 x i33> [[TMP14]]
+;
+; VI-LABEL: @smul24_v2i33(
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i33> [[LHS:%.*]], <i33 9, i33 9>
+; VI-NEXT:    [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], <i33 9, i33 9>
+; VI-NEXT:    [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], <i33 9, i33 9>
+; VI-NEXT:    [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], <i33 9, i33 9>
+; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0
+; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1
+; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0
+; VI-NEXT:    [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1
+; VI-NEXT:    [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32
+; VI-NEXT:    [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32
+; VI-NEXT:    [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]])
+; VI-NEXT:    [[TMP8:%.*]] = sext i32 [[TMP7]] to i33
+; VI-NEXT:    [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32
+; VI-NEXT:    [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32
+; VI-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]])
+; VI-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i33
+; VI-NEXT:    [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0
+; VI-NEXT:    [[TMP14:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1
+; VI-NEXT:    ret <2 x i33> [[TMP14]]
+;
+  %shl.lhs = shl <2 x i33> %lhs, <i33 9, i33 9>
+  %lhs24 = ashr <2 x i33> %shl.lhs, <i33 9, i33 9>
+  %lshr.rhs = shl <2 x i33> %rhs, <i33 9, i33 9>
+  %rhs24 = ashr <2 x i33> %lhs, <i33 9, i33 9>
+  %mul = mul <2 x i33> %lhs24, %rhs24
+  ret <2 x i33> %mul
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
new file mode 100644
index 0000000000000..a1dbe9a1322e8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test_mul_i24:
+; GCN: v_mul_i32_i24
+define amdgpu_kernel void @test_mul_i24(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #1 {
+  %val = call i32 @llvm.amdgcn.mul.i24(i32 %src1, i32 %src2) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mul.i24(i32, i32) #0
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
new file mode 100644
index 0000000000000..810b50337e2b9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test_mul_u24:
+; GCN: v_mul_u32_u24
+define amdgpu_kernel void @test_mul_u24(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #1 {
+  %val = call i32 @llvm.amdgcn.mul.u24(i32 %src1, i32 %src2) #0
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mul.u24(i32, i32) #0
+
+attributes #0 = { nounwind readnone speculatable }
+attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
index 5f109624dafa2..7c7b5925adfae 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -233,3 +233,79 @@ entry:
   store i64 %mad_ext, i64 addrspace(1)* %out
   ret void
 }
+
+; The ands are asserting the high bits are 0. SimplifyDemandedBits on
+; the adds would remove the ands before the target combine on the mul
+; had a chance to form mul24. The mul combine would then see
+; extractelement with no known bits and fail. All of the mul/add
+; combos in this loop should form v_mad_u32_u24.
+
+; FUNC-LABEL: {{^}}mad24_known_bits_destroyed:
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+; GCN: v_mad_u32_u24
+define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 addrspace(1)* %arg7, <4 x i32> addrspace(1)* %arg8) #0 {
+bb:
+  %tmp = and i32 %arg4, 16777215
+  %tmp9 = extractelement <4 x i32> %arg1, i64 1
+  %tmp10 = extractelement <4 x i32> %arg3, i64 1
+  %tmp11 = and i32 %tmp9, 16777215
+  %tmp12 = extractelement <4 x i32> %arg1, i64 2
+  %tmp13 = extractelement <4 x i32> %arg3, i64 2
+  %tmp14 = and i32 %tmp12, 16777215
+  %tmp15 = extractelement <4 x i32> %arg1, i64 3
+  %tmp16 = extractelement <4 x i32> %arg3, i64 3
+  %tmp17 = and i32 %tmp15, 16777215
+  br label %bb19
+
+bb18:                                             ; preds = %bb19
+  ret void
+
+bb19:                                             ; preds = %bb19, %bb
+  %tmp20 = phi i32 [ %arg, %bb ], [ %tmp40, %bb19 ]
+  %tmp21 = phi i32 [ 0, %bb ], [ %tmp54, %bb19 ]
+  %tmp22 = phi <4 x i32> [ %arg2, %bb ], [ %tmp53, %bb19 ]
+  %tmp23 = and i32 %tmp20, 16777215
+  %tmp24 = mul i32 %tmp23, %tmp
+  %tmp25 = add i32 %tmp24, %arg5
+  %tmp26 = extractelement <4 x i32> %tmp22, i64 1
+  %tmp27 = and i32 %tmp26, 16777215
+  %tmp28 = mul i32 %tmp27, %tmp11
+  %tmp29 = add i32 %tmp28, %tmp10
+  %tmp30 = extractelement <4 x i32> %tmp22, i64 2
+  %tmp31 = and i32 %tmp30, 16777215
+  %tmp32 = mul i32 %tmp31, %tmp14
+  %tmp33 = add i32 %tmp32, %tmp13
+  %tmp34 = extractelement <4 x i32> %tmp22, i64 3
+  %tmp35 = and i32 %tmp34, 16777215
+  %tmp36 = mul i32 %tmp35, %tmp17
+  %tmp37 = add i32 %tmp36, %tmp16
+  %tmp38 = and i32 %tmp25, 16777215
+  %tmp39 = mul i32 %tmp38, %tmp
+  %tmp40 = add i32 %tmp39, %arg5
+  store i32 %tmp40, i32 addrspace(1)* %arg7
+  %tmp41 = insertelement <4 x i32> undef, i32 %tmp40, i32 0
+  %tmp42 = and i32 %tmp29, 16777215
+  %tmp43 = mul i32 %tmp42, %tmp11
+  %tmp44 = add i32 %tmp43, %tmp10
+  %tmp45 = insertelement <4 x i32> %tmp41, i32 %tmp44, i32 1
+  %tmp46 = and i32 %tmp33, 16777215
+  %tmp47 = mul i32 %tmp46, %tmp14
+  %tmp48 = add i32 %tmp47, %tmp13
+  %tmp49 = insertelement <4 x i32> %tmp45, i32 %tmp48, i32 2
+  %tmp50 = and i32 %tmp37, 16777215
+  %tmp51 = mul i32 %tmp50, %tmp17
+  %tmp52 = add i32 %tmp51, %tmp16
+  %tmp53 = insertelement <4 x i32> %tmp49, i32 %tmp52, i32 3
+  store <4 x i32> %tmp53, <4 x i32> addrspace(1)* %arg8
+  %tmp54 = add nuw nsw i32 %tmp21, 1
+  %tmp55 = icmp eq i32 %tmp54, %arg6
+  br i1 %tmp55, label %bb18, label %bb19
+}
+
+attributes #0 = { norecurse nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/mul.i16.ll b/llvm/test/CodeGen/AMDGPU/mul.i16.ll
index f67f17ad78a88..48619055c8ee5 100644
--- a/llvm/test/CodeGen/AMDGPU/mul.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul.i16.ll
@@ -41,8 +41,8 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}v_mul_v2i16:
-; SI: v_mul_lo_u32
-; SI: v_mul_lo_u32
+; SI: v_mul_u32_u24
+; SI: v_mul_u32_u24
 
 ; VI: v_mul_lo_u16_sdwa
 ; VI: v_mul_lo_u16_e32
@@ -59,9 +59,9 @@ define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) {
 
 ; FIXME: Unpack garbage on gfx9
 ; GCN-LABEL: {{^}}v_mul_v3i16:
-; SI: v_mul_lo_u32
-; SI: v_mul_lo_u32
-; SI: v_mul_lo_u32
+; SI: v_mul_u32_u24
+; SI: v_mul_u32_u24
+; SI: v_mul_u32_u24
 
 ; VI: v_mul_lo_u16
 ; VI: v_mul_lo_u16
@@ -77,10 +77,10 @@ define <3 x i16> @v_mul_v3i16(<3 x i16> %a, <3 x i16> %b) {
 }
 
 ; GCN-LABEL: {{^}}v_mul_v4i16:
-; SI: v_mul_lo_u32
-; SI: v_mul_lo_u32
-; SI: v_mul_lo_u32
-; SI: v_mul_lo_u32
+; SI: v_mul_u32_u24
+; SI: v_mul_u32_u24
+; SI: v_mul_u32_u24
+; SI: v_mul_u32_u24
 
 ; VI: v_mul_lo_u16_sdwa
 ; VI: v_mul_lo_u16_e32
diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
index 019e2b51ce122..3ced470858057 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
@@ -249,8 +249,8 @@ entry:
 ; GCN-DAG: v_and_b32_e32 v1, [[U23_MASK]], v1
 ; GCN-DAG: v_mul_u32_u24_e32 v0, 0xea, v0
 ; GCN-DAG: v_mul_u32_u24_e32 v1, 0x39b, v1
-; GCN: v_and_b32_e32 v1, s4, v1
-; GCN: v_and_b32_e32 v0, 0x7ffffe, v0
+; GCN-DAG: v_and_b32_e32 v1, s4, v1
+; GCN-DAG: v_and_b32_e32 v0, 0x7ffffe, v0
 ; GCN: v_mul_u32_u24_e32 v0, v0, v1
 ; GCN: v_and_b32_e32 v0, 0x1fffe, v0
 ; GCN: v_mul_u32_u24_e32 v0, 0x63, v0

From 3e7c314b039d735adf760ddead7f6e32d4dc81be Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Mon, 15 Jul 2019 17:51:02 +0000
Subject: [PATCH 130/451] Reland "[COFF] Add null check in case of symbols
 defined in LTO blobs"

This reverts r365990 (git commit 1a6053ebc61cb0b8146f5ca27b74859a9a91e0a3)

The test no longer depends on the Visual C++ libraries. I confirmed that
the crash still reproduces with the new test case if I remove the null
check.

llvm-svn: 366095
---
 lld/COFF/SymbolTable.cpp                      |  2 +-
 .../COFF/Inputs/undefined-symbol-lto-a.ll     | 82 +++++++++++++++++++
 .../COFF/Inputs/undefined-symbol-lto-b.ll     | 29 +++++++
 lld/test/COFF/undefined-symbol-lto.test       | 29 +++++++
 4 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
 create mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
 create mode 100644 lld/test/COFF/undefined-symbol-lto.test

diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 280a9c28892c8..2173c10c1ca56 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -69,7 +69,7 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) {
 
   for (Symbol *s : sc->file->getSymbols()) {
     auto *d = dyn_cast_or_null<DefinedRegular>(s);
-    if (!d || d->getChunk() != sc || d->getValue() > addr ||
+    if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr ||
         (candidate && d->getValue() < candidate->getValue()))
       continue;
 
diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
new file mode 100644
index 0000000000000..6793ec718e806
--- /dev/null
+++ b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll
@@ -0,0 +1,82 @@
+; ModuleID = 't.obj'
+source_filename = "t.cpp"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.21.27702"
+
+%struct.Init = type { %struct.S }
+%struct.S = type { i32 (...)** }
+%rtti.CompleteObjectLocator = type { i32, i32, i32, i32, i32, i32 }
+%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] }
+%rtti.ClassHierarchyDescriptor = type { i32, i32, i32, i32 }
+%rtti.BaseClassDescriptor = type { i32, i32, i32, i32, i32, i32, i32 }
+
+$"??_SS@@6B@" = comdat largest
+
+$"??_R4S@@6B@" = comdat any
+
+$"??_R0?AUS@@@8" = comdat any
+
+$"??_R3S@@8" = comdat any
+
+$"??_R2S@@8" = comdat any
+
+$"??_R1A@?0A@EA@S@@8" = comdat any
+
+@"?d@@3UInit@@A" = dso_local local_unnamed_addr global %struct.Init zeroinitializer, align 8
+@anon.bcb2691509de99310dddb690fcdb4cdc.0 = private unnamed_addr constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*)* @"?foo@S@@UEAAXXZ" to i8*)] }, comdat($"??_SS@@6B@"), !type !0
+@"??_R4S@@6B@" = linkonce_odr constant %rtti.CompleteObjectLocator { i32 1, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
+@"??_7type_info@@6B@" = external constant i8*
+@"??_R0?AUS@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"??_7type_info@@6B@", i8* null, [8 x i8] c".?AUS@@\00" }, comdat
+@__ImageBase = external dso_local constant i8
+@"??_R3S@@8" = linkonce_odr constant %rtti.ClassHierarchyDescriptor { i32 0, i32 0, i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint ([2 x i32]* @"??_R2S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
+@"??_R2S@@8" = linkonce_odr constant [2 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.BaseClassDescriptor* @"??_R1A@?0A@EA@S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0], comdat
+@"??_R1A@?0A@EA@S@@8" = linkonce_odr constant %rtti.BaseClassDescriptor { i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 0, i32 -1, i32 0, i32 64, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_t.cpp, i8* null }]
+
+@"??_SS@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @anon.bcb2691509de99310dddb690fcdb4cdc.0, i32 0, i32 0, i32 1)
+
+declare dso_local void @"?undefined_ref@@YAXXZ"() local_unnamed_addr #0
+
+declare dllimport void @"?foo@S@@UEAAXXZ"(%struct.S*) unnamed_addr #0
+
+; Function Attrs: nounwind sspstrong uwtable
+define internal void @_GLOBAL__sub_I_t.cpp() #1 {
+entry:
+  store i32 (...)** bitcast (i8** @"??_SS@@6B@" to i32 (...)**), i32 (...)*** getelementptr inbounds (%struct.Init, %struct.Init* @"?d@@3UInit@@A", i64 0, i32 0, i32 0), align 8
+  tail call void @"?undefined_ref@@YAXXZ"() #2
+  ret void
+}
+
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.linker.options = !{!1, !2}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = !{i64 8, !"?AUS@@"}
+!1 = !{!"/DEFAULTLIB:libcmt.lib"}
+!2 = !{!"/DEFAULTLIB:oldnames.lib"}
+!3 = !{i32 1, !"wchar_size", i32 2}
+!4 = !{i32 7, !"PIC Level", i32 2}
+!5 = !{i32 1, !"ThinLTO", i32 0}
+!6 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!7 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"}
+
+^0 = module: (path: "t.obj", hash: (0, 0, 0, 0, 0))
+^1 = gv: (name: "__ImageBase") ; guid = 434928772013489304
+^2 = gv: (name: "??_R2S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^6)))) ; guid = 2160898732728284029
+^3 = gv: (name: "llvm.global_ctors", summaries: (variable: (module: ^0, flags: (linkage: appending, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^14)))) ; guid = 2412314959268824392
+^4 = gv: (name: "?foo@S@@UEAAXXZ") ; guid = 6578172636330484861
+^5 = gv: (name: "??_SS@@6B@", summaries: (alias: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^10))) ; guid = 8774897714842691026
+^6 = gv: (name: "??_R1A@?0A@EA@S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^11, ^1, ^8)))) ; guid = 9397802696236423453
+^7 = gv: (name: "?undefined_ref@@YAXXZ") ; guid = 9774674600202276560
+^8 = gv: (name: "??_R3S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^2)))) ; guid = 10685958509605791599
+^9 = gv: (name: "??_7type_info@@6B@") ; guid = 10826752452437539368
+^10 = gv: (name: "anon.bcb2691509de99310dddb690fcdb4cdc.0", summaries: (variable: (module: ^0, flags: (linkage: private, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), vTableFuncs: ((virtFunc: ^4, offset: 8)), refs: (^13, ^4)))) ; guid = 11510395461204283992
+^11 = gv: (name: "??_R0?AUS@@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^9)))) ; guid = 12346607659584231960
+^12 = gv: (name: "?d@@3UInit@@A", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1)))) ; guid = 14563354643524156382
+^13 = gv: (name: "??_R4S@@6B@", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^13, ^11, ^1, ^8)))) ; guid = 14703528065171087394
+^14 = gv: (name: "_GLOBAL__sub_I_t.cpp", summaries: (function: (module: ^0, flags: (linkage: internal, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 3, calls: ((callee: ^7)), refs: (^12, ^5)))) ; guid = 15085897428757412588
+^15 = typeidCompatibleVTable: (name: "?AUS@@", summary: ((offset: 8, ^10))) ; guid = 13986515119763165370
diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
new file mode 100644
index 0000000000000..ff73e7c6ba680
--- /dev/null
+++ b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll
@@ -0,0 +1,29 @@
+; ModuleID = 'b.obj'
+source_filename = "b.cpp"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.21.27702"
+
+%struct.S = type { i32 (...)** }
+
+; Function Attrs: norecurse nounwind readnone sspstrong uwtable
+define dso_local void @"?foo@S@@UEAAXXZ"(%struct.S* nocapture %this) unnamed_addr #0 align 2 {
+entry:
+  ret void
+}
+
+attributes #0 = { norecurse nounwind readnone sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.linker.options = !{!0, !1}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = !{!"/DEFAULTLIB:libcmt.lib"}
+!1 = !{!"/DEFAULTLIB:oldnames.lib"}
+!2 = !{i32 1, !"wchar_size", i32 2}
+!3 = !{i32 7, !"PIC Level", i32 2}
+!4 = !{i32 1, !"ThinLTO", i32 0}
+!5 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!6 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"}
+
+^0 = module: (path: "b.obj", hash: (0, 0, 0, 0, 0))
+^1 = gv: (name: "?foo@S@@UEAAXXZ", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0)))) ; guid = 6578172636330484861
diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test
new file mode 100644
index 0000000000000..700ec650857ca
--- /dev/null
+++ b/lld/test/COFF/undefined-symbol-lto.test
@@ -0,0 +1,29 @@
+RUN: rm -rf %t && mkdir -p %t && cd %t
+RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj
+RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj
+RUN: llvm-lib b.obj -out:b.lib
+RUN: not lld-link -entry:main -nodefaultlib t.obj b.lib -subsystem:console 2>&1 | FileCheck %s
+
+CHECK: undefined symbol: main
+CHECK: undefined symbol: void __cdecl undefined_ref(void)
+CHECK: referenced by
+
+Originally reported as PR42536.
+
+a.ll corresponds to this C++:
+
+struct __declspec(dllimport) S {
+  virtual void foo();
+};
+void undefined_ref();
+struct Init {
+  Init() { undefined_ref(); }
+  S c;
+} d;
+
+b.ll is from this C++:
+
+struct S {
+  virtual void foo();
+};
+void S::foo() {}

From 81971b2b79c4ac7b01f4997176dae86befc55f37 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 15 Jul 2019 17:56:57 +0000
Subject: [PATCH 131/451] [X86] Return UNDEF from LowerScalarImmediateShift
 when the shift amount is out of range.

I think we only turn out of range shiftss to undef when
all elements are out of range or the shift amount is a splat out
of range. I'm not sure which, I didn't check.

During lowering we can split a shift where some elements
are out of range into multiple shifts. This can create a
new shift with a splat shift amount that is out of range.

This patch returns undef for this case.

Fixes PR42615.

Differential Revision: https://reviews.llvm.org/D64699

llvm-svn: 366096
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c5bf3dcac45df..47389f2df32bd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25033,8 +25033,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   APInt APIntShiftAmt;
   if (!isConstantSplat(Amt, APIntShiftAmt))
     return SDValue();
-  assert(APIntShiftAmt.ult(VT.getScalarSizeInBits()) &&
-         "Out of range shift amount");
+
+  // If the shift amount is out of range, return undef.
+  if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
+    return DAG.getUNDEF(VT);
+
   uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
 
   if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))

From 032e3c468fab3ffc033db219314c2151c6bda917 Mon Sep 17 00:00:00 2001
From: Stella Stamenova <stilis@microsoft.com>
Date: Mon, 15 Jul 2019 18:15:12 +0000
Subject: [PATCH 132/451] [llvm-lib] Add a dependency to intrinsics_gen to the
 LLVMLibDriver build

Summary:
Occasionally the build of LLVMLibDriver will fail because Attributes.inc has not been generated yet. Add an explicit dependency, so that we can guarantee that the file has been generated before LLVMLibDriver is build.

##[error]llvm\include\llvm\IR\Attributes.h(73,0): Error C1083: Cannot open include file: 'llvm/IR/Attributes.inc': No such file or directory
llvm\include\llvm/IR/Attributes.h(73): fatal error C1083: Cannot open include file: 'llvm/IR/Attributes.inc': No such file or directory [LLVMLibDriver.vcxproj]

Reviewers: asmith

Subscribers: mgorny, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64357

llvm-svn: 366097
---
 llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt b/llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt
index cbcffb64e7d82..3d948aab8e31d 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt
+++ b/llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt
@@ -12,5 +12,8 @@ add_public_tablegen_target(LibOptionsTableGen)
 
 add_llvm_library(LLVMLibDriver
   LibDriver.cpp
+
+  DEPENDS
+  intrinsics_gen
   )
 add_dependencies(LLVMLibDriver LibOptionsTableGen)

From eb99165b97b79c3ccc7b5ebcd445a98b4240e171 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 15 Jul 2019 18:17:23 +0000
Subject: [PATCH 133/451] [x86] try to keep FP casted+truncated+extracted
 vector element out of GPRs

inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0)

We have pseudo-vectorization of scalar int to FP casts, so this tries to
make that more likely by replacing a truncate with a bitcast. I didn't see
any test diffs starting from 'uitofp', so I left that as a TODO. We can't
only match the shorter trunc+extract pattern because there's an opposing
transform somewhere, so we infinite loop. Waiting to try this during
lowering is another possibility.

A motivating case is shown in PR39975 and included in the test diffs here:
https://bugs.llvm.org/show_bug.cgi?id=39975

Differential Revision: https://reviews.llvm.org/D64710

llvm-svn: 366098
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 39 +++++++++++++++++++
 llvm/test/CodeGen/X86/known-bits-vector.ll    |  5 +--
 .../test/CodeGen/X86/known-signbits-vector.ll | 29 +++++++-------
 3 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 47389f2df32bd..34a85806f563e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42430,6 +42430,41 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
   return SDValue();
 }
 
+/// If we are converting a value to floating-point, try to replace scalar
+/// truncate of an extracted vector element with a bitcast. This tries to keep
+/// the sequence on XMM registers rather than moving between vector and GPRs.
+static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) {
+  // TODO: This is currently only used by combineSIntToFP, but it is generalized
+  //       to allow being called by any similar cast opcode.
+  // TODO: Consider merging this into lowering: vectorizeExtractedCast().
+  SDValue Trunc = N->getOperand(0);
+  if (!Trunc.hasOneUse() || Trunc.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue ExtElt = Trunc.getOperand(0);
+  if (!ExtElt.hasOneUse() || ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      !isNullConstant(ExtElt.getOperand(1)))
+    return SDValue();
+
+  EVT TruncVT = Trunc.getValueType();
+  EVT SrcVT = ExtElt.getValueType();
+  unsigned DestWidth = TruncVT.getSizeInBits();
+  unsigned SrcWidth = SrcVT.getSizeInBits();
+  if (SrcWidth % DestWidth != 0)
+    return SDValue();
+
+  // inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0)
+  EVT SrcVecVT = ExtElt.getOperand(0).getValueType();
+  unsigned VecWidth = SrcVecVT.getSizeInBits();
+  unsigned NumElts = VecWidth / DestWidth;
+  EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts);
+  SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0));
+  SDLoc DL(N);
+  SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT,
+                                  BitcastVec, ExtElt.getOperand(1));
+  return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt);
+}
+
 static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
                                const X86Subtarget &Subtarget) {
   SDValue Op0 = N->getOperand(0);
@@ -42523,6 +42558,10 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
       return FILDChain;
     }
   }
+
+  if (SDValue V = combineToFPTruncExtElt(N, DAG))
+    return V;
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll
index a35cd8397395e..067ac9a6f7ef6 100644
--- a/llvm/test/CodeGen/X86/known-bits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-bits-vector.ll
@@ -33,9 +33,8 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
 ;
 ; X64-LABEL: knownbits_mask_extract_uitofp:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    movzwl %ax, %eax
-; X64-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm0
+; X64-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
   %2 = extractelement <2 x i64> %1, i32 0
diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll
index a6b993d722f1b..846d97d550736 100644
--- a/llvm/test/CodeGen/X86/known-signbits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll
@@ -67,9 +67,8 @@ define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind {
 ;
 ; X64-LABEL: signbits_ashr_extract_sitofp_0:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    shrq $32, %rax
-; X64-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm0
+; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr <2 x i64> %a0, <i64 32, i64 32>
   %2 = extractelement <2 x i64> %1, i32 0
@@ -90,9 +89,8 @@ define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind {
 ;
 ; X64-LABEL: signbits_ashr_extract_sitofp_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    shrq $32, %rax
-; X64-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm0
+; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr <2 x i64> %a0, <i64 32, i64 63>
   %2 = extractelement <2 x i64> %1, i32 0
@@ -115,10 +113,10 @@ define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind {
 ;
 ; X64-LABEL: signbits_ashr_shl_extract_sitofp:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    sarq $61, %rax
-; X64-NEXT:    shll $20, %eax
-; X64-NEXT:    vcvtsi2ss %eax, %xmm1, %xmm0
+; X64-NEXT:    vpsrad $29, %xmm0, %xmm0
+; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-NEXT:    vpsllq $20, %xmm0, %xmm0
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
   %2 = shl <2 x i64> %1, <i64 20, i64 16>
@@ -147,8 +145,9 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
 ; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
 ; X64:       # %bb.0:
 ; X64-NEXT:    sarq $30, %rdi
-; X64-NEXT:    shrq $3, %rdi
-; X64-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
+; X64-NEXT:    vmovq %rdi, %xmm0
+; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr i64 %a0, 30
   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -234,8 +233,7 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
 ; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; X64-NEXT:    vmovd %edi, %xmm1
 ; X64-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    vcvtsi2ss %eax, %xmm2, %xmm0
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
   %2 = sext i32 %a2 to i64
@@ -280,8 +278,7 @@ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4
 ; X64-NEXT:    vpand %xmm1, %xmm0, %xmm2
 ; X64-NEXT:    vpor %xmm1, %xmm2, %xmm1
 ; X64-NEXT:    vpxor %xmm0, %xmm1, %xmm0
-; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    vcvtsi2ss %eax, %xmm3, %xmm0
+; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr <2 x i64> %a0, <i64 61, i64 60>
   %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>

From b390121efb39a1322c2bdd789f99c820ba4f4ad7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 18:18:46 +0000
Subject: [PATCH 134/451] AMDGPU/GlobalISel: Select llvm.amdgcn.end.cf

llvm-svn: 366099
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 14 +++++++
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  5 +++
 .../GlobalISel/llvm.amdgcn.end.cf.i32.ll      | 39 +++++++++++++++++++
 .../GlobalISel/llvm.amdgcn.end.cf.i64.ll      | 36 +++++++++++++++++
 4 files changed, 94 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6b827db6faa80..6fa3e7baf237b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -655,6 +655,20 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     I.eraseFromParent();
     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
   }
+  case Intrinsic::amdgcn_end_cf: {
+    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+    // SelectionDAG uses for wave32 vs wave64.
+    BuildMI(*BB, &I, I.getDebugLoc(),
+            TII.get(AMDGPU::SI_END_CF))
+      .add(I.getOperand(1));
+
+    Register Reg = I.getOperand(1).getReg();
+    I.eraseFromParent();
+
+    if (!MRI.getRegClassOrNull(Reg))
+      MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+    return true;
+  }
   default:
     return selectImpl(I, CoverageInfo);
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 95aa32eff98a8..2bbc259e8fcd0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2077,6 +2077,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
       break;
     }
+    case Intrinsic::amdgcn_end_cf: {
+      unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+      OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
     }
     break;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
new file mode 100644
index 0000000000000..8689b650b8f25
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
+; GCN-LABEL: test_wave32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GCN-NEXT:    ; implicit-def: $vcc_hi
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_eq_u32 s1, 0
+; GCN-NEXT:    s_cbranch_scc0 BB0_2
+; GCN-NEXT:  ; %bb.1: ; %mid
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:  BB0_2: ; %bb
+; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  br i1 %cond, label %mid, label %bb
+
+mid:
+  store volatile i32 0, i32 addrspace(1)* undef
+  br label %bb
+
+bb:
+  call void @llvm.amdgcn.end.cf.i32(i32 %saved)
+  store volatile i32 0, i32 addrspace(1)* undef
+  ret void
+}
+
+declare void @llvm.amdgcn.end.cf.i32(i32 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
new file mode 100644
index 0000000000000..9e19eefab3b5e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) {
+; GCN-LABEL: test_wave64:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_eq_u32 s2, 0
+; GCN-NEXT:    s_cbranch_scc0 BB0_2
+; GCN-NEXT:  ; %bb.1: ; %mid
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:  BB0_2: ; %bb
+; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  br i1 %cond, label %mid, label %bb
+
+mid:
+  store volatile i32 0, i32 addrspace(1)* undef
+  br label %bb
+
+bb:
+  call void @llvm.amdgcn.end.cf.i64(i64 %saved)
+  store volatile i32 0, i32 addrspace(1)* undef
+  ret void
+}
+
+declare void @llvm.amdgcn.end.cf.i64(i64 %val)

From ff1c5288cb6a69e95797ca41af70da9ea0c2a3a4 Mon Sep 17 00:00:00 2001
From: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Date: Mon, 15 Jul 2019 18:22:06 +0000
Subject: [PATCH 135/451] AMDGPU: Remove reserved value accidentally left in
 for gfx908

llvm-svn: 366101
---
 llvm/include/llvm/BinaryFormat/ELF.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index c9cc8031569d3..2bd711137845e 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -711,8 +711,7 @@ enum : unsigned {
 
   // Reserved for AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
-  EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
-  EF_AMDGPU_MACH_AMDGCN_RESERVED2 = 0x032,
+  EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x032,
 
   // First/last AMDGCN-based processors.
   EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,

From 53fa759ff5a2ccc742c2d5138c4b1c04ead201ae Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 18:25:24 +0000
Subject: [PATCH 136/451] AMDGPU/GlobalISel: Handle llvm.amdgcn.if.break

llvm-svn: 366102
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 25 +++++++++++++++++
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  7 +++++
 .../GlobalISel/llvm.amdgcn.if.break.i32.ll    | 27 +++++++++++++++++++
 .../GlobalISel/llvm.amdgcn.if.break.i64.ll    | 26 ++++++++++++++++++
 4 files changed, 85 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6fa3e7baf237b..317a9b5c08ccb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -475,6 +475,31 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(
   case Intrinsic::minnum:
   case Intrinsic::amdgcn_cvt_pkrtz:
     return selectImpl(I, CoverageInfo);
+  case Intrinsic::amdgcn_if_break: {
+    MachineBasicBlock *BB = I.getParent();
+    MachineFunction *MF = BB->getParent();
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+
+    // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+    // SelectionDAG uses for wave32 vs wave64.
+    BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
+      .add(I.getOperand(0))
+      .add(I.getOperand(2))
+      .add(I.getOperand(3));
+
+    Register DstReg = I.getOperand(0).getReg();
+    Register Src0Reg = I.getOperand(2).getReg();
+    Register Src1Reg = I.getOperand(3).getReg();
+
+    I.eraseFromParent();
+
+    for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
+      if (!MRI.getRegClassOrNull(Reg))
+        MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
+    }
+
+    return true;
+  }
   default:
     return selectImpl(I, CoverageInfo);
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 2bbc259e8fcd0..be05d9cb0ec6b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1985,6 +1985,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
       break;
     }
+    case Intrinsic::amdgcn_if_break: {
+      unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+      OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+      break;
+    }
     }
     break;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
new file mode 100644
index 0000000000000..282441a2a1d74
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
+; GCN-LABEL: test_wave32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GCN-NEXT:    s_load_dword s1, s[4:5], 0x24
+; GCN-NEXT:    ; implicit-def: $vcc_hi
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_eq_u32 s0, 0
+; GCN-NEXT:    s_cselect_b32 s0, -1, 0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
+; GCN-NEXT:    s_or_b32 s0, s0, s1
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
+; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  %break = call i32 @llvm.amdgcn.if.break.i32(i1 %cond, i32 %saved)
+  store volatile i32 %break, i32 addrspace(1)* undef
+  ret void
+}
+
+declare i32 @llvm.amdgcn.if.break.i32(i1, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
new file mode 100644
index 0000000000000..77d1374c86c17
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) {
+; GCN-LABEL: test_wave64:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0x0
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xa
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_eq_u32 s2, 0
+; GCN-NEXT:    s_cselect_b32 s2, -1, 0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, s2
+; GCN-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GCN-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %arg0, 0
+  %break = call i64 @llvm.amdgcn.if.break.i64(i1 %cond, i64 %saved)
+  store volatile i64 %break, i64 addrspace(1)* undef
+  ret void
+}
+
+declare i64 @llvm.amdgcn.if.break.i64(i1, i64)

From 90bdfb3daf7871ab324ff151b36cb0e783034706 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 18:31:10 +0000
Subject: [PATCH 137/451] AMDGPU/GlobalISel: Widen vector extracts

llvm-svn: 366103
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  13 +-
 .../legalize-extract-vector-elt.mir           | 366 ++++++++++++++++++
 2 files changed, 374 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index df58e7dbef040..090208e4c309c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -634,11 +634,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
     getActionDefinitionsBuilder(Op)
       .legalIf([=](const LegalityQuery &Query) {
-          const LLT &VecTy = Query.Types[VecTypeIdx];
-          const LLT &IdxTy = Query.Types[IdxTypeIdx];
-          return VecTy.getSizeInBits() % 32 == 0 &&
-            VecTy.getSizeInBits() <= 512 &&
-            IdxTy.getSizeInBits() == 32;
+          const LLT EltTy = Query.Types[EltTypeIdx];
+          const LLT VecTy = Query.Types[VecTypeIdx];
+          const LLT IdxTy = Query.Types[IdxTypeIdx];
+          return (EltTy.getSizeInBits() == 16 ||
+                  EltTy.getSizeInBits() % 32 == 0) &&
+                 VecTy.getSizeInBits() % 32 == 0 &&
+                 VecTy.getSizeInBits() <= 512 &&
+                 IdxTy.getSizeInBits() == 32;
         })
       .clampScalar(EltTypeIdx, S32, S64)
       .clampScalar(VecTypeIdx, S32, S64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
index cc8471a39c4d5..2fdde786b14dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -280,3 +280,369 @@ body: |
     %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
+
+---
+name: extract_vector_elt_v2s8_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2s8_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[COPY]](<2 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY3]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(<2 x s8>) = G_TRUNC %0
+    %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v3s8_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: extract_vector_elt_v3s8_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[COPY1]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY3]](s32)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s32) = COPY $vgpr3
+    %2:_(<3 x s8>) = G_TRUNC %0
+    %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v4s8_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
+
+    ; CHECK-LABEL: name: extract_vector_elt_v4s8_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY [[COPY]](<4 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32)
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[UV3]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
+    ; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32), [[ASHR3]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<4 x s32>), [[COPY1]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY3]](s32)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s32) = COPY $vgpr4
+    %2:_(<4 x s8>) = G_TRUNC %0
+    %3:_(s8) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v2s16_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2s16_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[COPY1]](s32)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_vector_elt_v2s16_idx0_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx0_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[C]](s32)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(s32) = G_CONSTANT i32 0
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_vector_elt_v2s16_idx1_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx1_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[C]](s32)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_vector_elt_v2s16_idx2_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx2_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[C]](s32)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(s32) = G_CONSTANT i32 2
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_vector_elt_v3s16_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: extract_vector_elt_v3s16_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[COPY1]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY3]](s32)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s32) = COPY $vgpr3
+    %2:_(<3 x s16>) = G_TRUNC %0
+    %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v3s16_idx0_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s32) = G_CONSTANT i32 0
+    %2:_(<3 x s16>) = G_TRUNC %0
+    %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v3s16_idx1_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(<3 x s16>) = G_TRUNC %0
+    %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v3s16_idx2_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s32) = G_CONSTANT i32 2
+    %2:_(<3 x s16>) = G_TRUNC %0
+    %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v3s16_idx3_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx3_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s32) = G_CONSTANT i32 3
+    %2:_(<3 x s16>) = G_TRUNC %0
+    %3:_(s16) = G_EXTRACT_VECTOR_ELT %2, %1
+    %4:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+...
+
+---
+name: extract_vector_elt_v4s16_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: extract_vector_elt_v4s16_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[COPY1]](s32)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: extract_vector_elt_v2s128_varidx_i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2s128_varidx_i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s128) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s128>), [[COPY1]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[EVEC]](s128)
+    %0:_(<2 x s128>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(s32) = COPY $vgpr8
+    %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...

From b2a0745e2d5ee156e97a2e779cbf2abb3b2efb74 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Mon, 15 Jul 2019 18:36:07 +0000
Subject: [PATCH 138/451] [WebAssembly] Assembler: recognize .init_array as
 data section.

Reviewers: sbc100

Subscribers: dschuff, jgravelle-google, aheejin, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64602

llvm-svn: 366104
---
 llvm/lib/MC/MCParser/WasmAsmParser.cpp    |  3 +++
 llvm/test/MC/WebAssembly/basic-assembly.s | 15 ++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index b19647793fac5..28d4459fecd44 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -120,6 +120,9 @@ class WasmAsmParser : public MCAsmParserExtension {
                     .StartsWith(".text", SectionKind::getText())
                     .StartsWith(".custom_section", SectionKind::getMetadata())
                     .StartsWith(".bss", SectionKind::getBSS())
+                    // See use of .init_array in WasmObjectWriter and
+                    // TargetLoweringObjectFileWasm
+                    .StartsWith(".init_array", SectionKind::getData())
                     .Default(Optional<SectionKind>());
     if (!Kind.hasValue())
       return Parser->Error(Lexer->getLoc(), "unknown section kind: " + Name);
diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s
index 2c396d12bf920..81d6001175b65 100644
--- a/llvm/test/MC/WebAssembly/basic-assembly.s
+++ b/llvm/test/MC/WebAssembly/basic-assembly.s
@@ -101,6 +101,10 @@ test0:
     .int32      2000000000
     .size       .L.str, 28
 
+    .section    .init_array.42,"",@
+    .p2align    2
+    .int32      test0
+
     .ident      "clang version 9.0.0 (trunk 364502) (llvm/trunk 364571)"
     .globaltype __stack_pointer, i32
 
@@ -190,8 +194,13 @@ test0:
 # CHECK-NEXT:  .L.str:
 # CHECK-NEXT:      .int8       72
 # CHECK-NEXT:      .asciz      "ello, World!"
-# CHECK-NEXT:      .int16       1234
-# CHECK-NEXT:      .int64       5000000000
-# CHECK-NEXT:      .int32       2000000000
+# CHECK-NEXT:      .int16      1234
+# CHECK-NEXT:      .int64      5000000000
+# CHECK-NEXT:      .int32      2000000000
+# CHECK-NEXT:      .size       .L.str, 28
+
+# CHECK:           .section    .init_array.42,"",@
+# CHECK-NEXT:      .p2align    2
+# CHECK-NEXT:      .int32      test0
 
 # CHECK:           .globaltype __stack_pointer, i32

From 4885978e23166045b3f6b48a124dffe0af9ef05c Mon Sep 17 00:00:00 2001
From: Matthew G McGovern <matthew.mcgovern@microsoft.com>
Date: Mon, 15 Jul 2019 18:42:14 +0000
Subject: [PATCH 139/451] [sanitizers][windows][mingw32] Mingw32 RTL fixes RTL
 interception broke mingw32, this should fix those builds by removing
 dependency on windows.h

reviewed in https://reviews.llvm.org/D64694

llvm-svn: 366105
---
 compiler-rt/lib/asan/asan_malloc_win.cc | 88 ++++++++++++-------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_malloc_win.cc b/compiler-rt/lib/asan/asan_malloc_win.cc
index 89d7003409261..ccbce061daf60 100644
--- a/compiler-rt/lib/asan/asan_malloc_win.cc
+++ b/compiler-rt/lib/asan/asan_malloc_win.cc
@@ -14,17 +14,12 @@
 #include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_platform.h"
 #if SANITIZER_WINDOWS
-// Need to include defintions for windows heap api functions,
-// these assume windows.h will also be included. This definition
-// fixes an error that's thrown if you only include heapapi.h
-#if defined(_M_IX86)
-#define _X86_
-#elif defined(_M_AMD64)
-#define _AMD64_
-#else
-#error "Missing arch or unsupported platform for Windows."
-#endif
-#include <heapapi.h>
+#include "asan_allocator.h"
+#include "asan_interceptors.h"
+#include "asan_internal.h"
+#include "asan_stack.h"
+#include "interception/interception.h"
+#include <stddef.h>
 
 // Intentionally not including windows.h here, to avoid the risk of
 // pulling in conflicting declarations of these functions. (With mingw-w64,
@@ -34,6 +29,9 @@ typedef void *HANDLE;
 typedef const void *LPCVOID;
 typedef void *LPVOID;
 
+typedef unsigned long DWORD;
+constexpr unsigned long HEAP_ZERO_MEMORY = 0x00000008;
+constexpr unsigned long HEAP_REALLOC_IN_PLACE_ONLY = 0x00000010;
 constexpr unsigned long HEAP_ALLOCATE_SUPPORTED_FLAGS = (HEAP_ZERO_MEMORY);
 constexpr unsigned long HEAP_ALLOCATE_UNSUPPORTED_FLAGS =
     (~HEAP_ALLOCATE_SUPPORTED_FLAGS);
@@ -45,13 +43,16 @@ constexpr unsigned long HEAP_REALLOC_SUPPORTED_FLAGS =
 constexpr unsigned long HEAP_REALLOC_UNSUPPORTED_FLAGS =
     (~HEAP_ALLOCATE_SUPPORTED_FLAGS);
 
-#include "asan_allocator.h"
-#include "asan_interceptors.h"
-#include "asan_internal.h"
-#include "asan_stack.h"
-#include "interception/interception.h"
 
-#include <stddef.h>
+extern "C" {
+LPVOID WINAPI HeapAlloc(HANDLE hHeap, DWORD dwFlags, size_t dwBytes);
+LPVOID WINAPI HeapReAlloc(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem,
+                         size_t dwBytes);
+BOOL WINAPI HeapFree(HANDLE hHeap, DWORD dwFlags, LPVOID lpMem);
+size_t WINAPI HeapSize(HANDLE hHeap, DWORD dwFlags, LPCVOID lpMem);
+
+BOOL WINAPI HeapValidate(HANDLE hHeap, DWORD dwFlags, LPCVOID lpMem);
+}
 
 using namespace __asan;  // NOLINT
 
@@ -160,7 +161,7 @@ void *_recalloc(void *p, size_t n, size_t elem_size) {
   size_t old_size = _msize(p);
   void *new_alloc = malloc(size);
   if (new_alloc) {
-    REAL(memcpy)(new_alloc, p, Min(size, old_size));
+    REAL(memcpy)(new_alloc, p, Min<size_t>(size, old_size));
     if (old_size < size)
       REAL(memset)(((u8 *)new_alloc) + old_size, 0, size - old_size);
     free(p);
@@ -206,7 +207,7 @@ int _CrtSetReportMode(int, int) {
 #define OWNED_BY_RTL(heap, memory) \
   (!__sanitizer_get_ownership(memory) && HeapValidate(heap, 0, memory))
 
-INTERCEPTOR_WINAPI(SIZE_T, HeapSize, HANDLE hHeap, DWORD dwFlags,
+INTERCEPTOR_WINAPI(size_t, HeapSize, HANDLE hHeap, DWORD dwFlags,
                    LPCVOID lpMem) {
   // If the RTL allocators are hooked we need to check whether the ASAN
   // allocator owns the pointer we're about to use. Allocations occur before
@@ -224,7 +225,7 @@ INTERCEPTOR_WINAPI(SIZE_T, HeapSize, HANDLE hHeap, DWORD dwFlags,
 }
 
 INTERCEPTOR_WINAPI(LPVOID, HeapAlloc, HANDLE hHeap, DWORD dwFlags,
-                   SIZE_T dwBytes) {
+                   size_t dwBytes) {
   // If the ASAN runtime is not initialized, or we encounter an unsupported
   // flag, fall back to the original allocator.
   if (flags()->windows_hook_rtl_allocators) {
@@ -269,14 +270,14 @@ INTERCEPTOR_WINAPI(BOOL, HeapFree, HANDLE hHeap, DWORD dwFlags, LPVOID lpMem) {
 }
 
 namespace __asan {
-using AllocFunction = LPVOID(WINAPI *)(HANDLE, DWORD, SIZE_T);
-using ReAllocFunction = LPVOID(WINAPI *)(HANDLE, DWORD, LPVOID, SIZE_T);
-using SizeFunction = SIZE_T(WINAPI *)(HANDLE, DWORD, LPVOID);
+using AllocFunction = LPVOID(WINAPI *)(HANDLE, DWORD, size_t);
+using ReAllocFunction = LPVOID(WINAPI *)(HANDLE, DWORD, LPVOID, size_t);
+using SizeFunction = size_t(WINAPI *)(HANDLE, DWORD, LPVOID);
 using FreeFunction = BOOL(WINAPI *)(HANDLE, DWORD, LPVOID);
 
 void *SharedReAlloc(ReAllocFunction reallocFunc, SizeFunction heapSizeFunc,
                     FreeFunction freeFunc, AllocFunction allocFunc,
-                    HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, SIZE_T dwBytes) {
+                    HANDLE hHeap, DWORD dwFlags, LPVOID lpMem, size_t dwBytes) {
   CHECK(reallocFunc && heapSizeFunc && freeFunc && allocFunc);
   GET_STACK_TRACE_MALLOC;
   GET_CURRENT_PC_BP_SP;
@@ -317,7 +318,7 @@ void *SharedReAlloc(ReAllocFunction reallocFunc, SizeFunction heapSizeFunc,
           replacement_alloc = asan_malloc(dwBytes, &stack);
         if (replacement_alloc) {
           size_t old_size = heapSizeFunc(hHeap, dwFlags, lpMem);
-          if (old_size == ((SIZE_T)0) - 1) {
+          if (old_size == ((size_t)0) - 1) {
             asan_free(replacement_alloc, &stack, FROM_MALLOC);
             return nullptr;
           }
@@ -341,7 +342,7 @@ void *SharedReAlloc(ReAllocFunction reallocFunc, SizeFunction heapSizeFunc,
       size_t old_usable_size = 0;
       if (replacement_alloc) {
         old_usable_size = asan_malloc_usable_size(lpMem, pc, bp);
-        REAL(memcpy)(replacement_alloc, lpMem, min(dwBytes, old_usable_size));
+        REAL(memcpy)(replacement_alloc, lpMem, Min<size_t>(dwBytes, old_usable_size));
         asan_free(lpMem, &stack, FROM_MALLOC);
       }
       return replacement_alloc;
@@ -388,7 +389,7 @@ void *SharedReAlloc(ReAllocFunction reallocFunc, SizeFunction heapSizeFunc,
 }  // namespace __asan
 
 INTERCEPTOR_WINAPI(LPVOID, HeapReAlloc, HANDLE hHeap, DWORD dwFlags,
-                   LPVOID lpMem, SIZE_T dwBytes) {
+                   LPVOID lpMem, size_t dwBytes) {
   return SharedReAlloc(REAL(HeapReAlloc), (SizeFunction)REAL(HeapSize),
                        REAL(HeapFree), REAL(HeapAlloc), hHeap, dwFlags, lpMem,
                        dwBytes);
@@ -399,28 +400,27 @@ INTERCEPTOR_WINAPI(LPVOID, HeapReAlloc, HANDLE hHeap, DWORD dwFlags,
 // allocations with detours and their definitions are unlikely to change.
 // Comments in /minkernel/ntos/rtl/heappublic.c indicate that these functions
 // are part of the heap's public interface.
-typedef ULONG LOGICAL;
+typedef unsigned long LOGICAL;
 
 // This function is documented as part of the Driver Development Kit but *not*
 // the Windows Development Kit.
-NTSYSAPI LOGICAL RtlFreeHeap(PVOID HeapHandle, ULONG Flags,
-                             _Frees_ptr_opt_ PVOID BaseAddress);
+LOGICAL RtlFreeHeap(void* HeapHandle, DWORD Flags,
+                            void* BaseAddress);
 
 // This function is documented as part of the Driver Development Kit but *not*
 // the Windows Development Kit.
-NTSYSAPI PVOID RtlAllocateHeap(PVOID HeapHandle, ULONG Flags, SIZE_T Size);
+void* RtlAllocateHeap(void* HeapHandle, DWORD Flags, size_t Size);
 
 // This function is completely undocumented.
-PVOID
-RtlReAllocateHeap(PVOID HeapHandle, ULONG Flags, PVOID BaseAddress,
-                  SIZE_T Size);
+void*
+RtlReAllocateHeap(void* HeapHandle, DWORD Flags, void* BaseAddress,
+                  size_t Size);
 
 // This function is completely undocumented.
-SIZE_T
-RtlSizeHeap(PVOID HeapHandle, ULONG Flags, PVOID BaseAddress);
+size_t RtlSizeHeap(void* HeapHandle, DWORD Flags, void* BaseAddress);
 
-INTERCEPTOR_WINAPI(SIZE_T, RtlSizeHeap, HANDLE HeapHandle, ULONG Flags,
-                   PVOID BaseAddress) {
+INTERCEPTOR_WINAPI(size_t, RtlSizeHeap, HANDLE HeapHandle, DWORD Flags,
+                   void* BaseAddress) {
   if (!flags()->windows_hook_rtl_allocators ||
       UNLIKELY(!asan_inited || OWNED_BY_RTL(HeapHandle, BaseAddress))) {
     return REAL(RtlSizeHeap)(HeapHandle, Flags, BaseAddress);
@@ -430,8 +430,8 @@ INTERCEPTOR_WINAPI(SIZE_T, RtlSizeHeap, HANDLE HeapHandle, ULONG Flags,
   return asan_malloc_usable_size(BaseAddress, pc, bp);
 }
 
-INTERCEPTOR_WINAPI(BOOL, RtlFreeHeap, HANDLE HeapHandle, ULONG Flags,
-                   PVOID BaseAddress) {
+INTERCEPTOR_WINAPI(BOOL, RtlFreeHeap, HANDLE HeapHandle, DWORD Flags,
+                   void* BaseAddress) {
   // Heap allocations happen before this function is hooked, so we must fall
   // back to the original function if the pointer is not from the ASAN heap, or
   // unsupported flags are provided.
@@ -445,8 +445,8 @@ INTERCEPTOR_WINAPI(BOOL, RtlFreeHeap, HANDLE HeapHandle, ULONG Flags,
   return true;
 }
 
-INTERCEPTOR_WINAPI(PVOID, RtlAllocateHeap, HANDLE HeapHandle, DWORD Flags,
-                   SIZE_T Size) {
+INTERCEPTOR_WINAPI(void*, RtlAllocateHeap, HANDLE HeapHandle, DWORD Flags,
+                   size_t Size) {
   // If the ASAN runtime is not initialized, or we encounter an unsupported
   // flag, fall back to the original allocator.
   if (!flags()->windows_hook_rtl_allocators ||
@@ -467,8 +467,8 @@ INTERCEPTOR_WINAPI(PVOID, RtlAllocateHeap, HANDLE HeapHandle, DWORD Flags,
   return p;
 }
 
-INTERCEPTOR_WINAPI(PVOID, RtlReAllocateHeap, HANDLE HeapHandle, ULONG Flags,
-                   PVOID BaseAddress, SIZE_T Size) {
+INTERCEPTOR_WINAPI(void*, RtlReAllocateHeap, HANDLE HeapHandle, DWORD Flags,
+                   void* BaseAddress, size_t Size) {
   // If it's actually a heap block which was allocated before the ASAN runtime
   // came up, use the real RtlFreeHeap function.
   if (!flags()->windows_hook_rtl_allocators)

From dc56995c57451368b4049738d4a56fa042db7a6e Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Mon, 15 Jul 2019 18:42:54 +0000
Subject: [PATCH 140/451] [ARM] MVE vector for 64bit types

We need to make sure that we are sensibly dealing with vectors of types v2i64
and v2f64, even if most of the time we cannot generate native operations for
them. This mostly adds a lot of testing, plus fixes up a couple of the issues
found. And, or and xor can be legal for v2i64, and shifts combining needs a
slight fixup.

Differential Revision: https://reviews.llvm.org/D64316

llvm-svn: 366106
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   6 +
 llvm/lib/Target/ARM/ARMInstrMVE.td            |  14 +-
 llvm/test/CodeGen/Thumb2/mve-abs.ll           |  47 ++++
 llvm/test/CodeGen/Thumb2/mve-bitarith.ll      |  65 +++++
 llvm/test/CodeGen/Thumb2/mve-div-expand.ll    | 194 ++++++++++++++
 llvm/test/CodeGen/Thumb2/mve-fmath.ll         | 250 ++++++++++++++++++
 llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll     |  57 ++++
 llvm/test/CodeGen/Thumb2/mve-frint.ll         | 150 +++++++++++
 llvm/test/CodeGen/Thumb2/mve-minmax.ll        | 215 +++++++++++++++
 llvm/test/CodeGen/Thumb2/mve-neg.ll           |  23 ++
 llvm/test/CodeGen/Thumb2/mve-sext.ll          |  47 ++++
 llvm/test/CodeGen/Thumb2/mve-shifts.ll        | 205 +++++++++++++-
 llvm/test/CodeGen/Thumb2/mve-shuffle.ll       | 146 +++++++++-
 llvm/test/CodeGen/Thumb2/mve-simple-arith.ll  | 166 ++++++++++++
 .../test/CodeGen/Thumb2/mve-soft-float-abi.ll |  68 +++++
 llvm/test/CodeGen/Thumb2/mve-vcvt.ll          | 108 ++++++++
 llvm/test/CodeGen/Thumb2/mve-vdup.ll          |  49 ++++
 llvm/test/CodeGen/Thumb2/mve-vmovimm.ll       |  89 +++++--
 llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll       |  16 +-
 19 files changed, 1875 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a67adde262d99..4eb8e0738a900 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -320,6 +320,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
   }
+  // We can do bitwise operations on v2i64 vectors
+  setOperationAction(ISD::AND, MVT::v2i64, Legal);
+  setOperationAction(ISD::OR, MVT::v2i64, Legal);
+  setOperationAction(ISD::XOR, MVT::v2i64, Legal);
 
   // It is legal to extload from v4i8 to v4i16 or v4i32.
   addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
@@ -12855,6 +12859,8 @@ static SDValue PerformShiftCombine(SDNode *N,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!VT.isVector() || !TLI.isTypeLegal(VT))
     return SDValue();
+  if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
+    return SDValue();
 
   int64_t Cnt;
 
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1880daa629412..3e7ae55c7fc8f 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -1035,6 +1035,8 @@ let Predicates = [HasMVEInt] in {
             (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
   def : Pat<(v4i32 (vnotq  (v4i32 MQPR:$val1))),
             (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
+  def : Pat<(v2i64 (vnotq  (v2i64 MQPR:$val1))),
+            (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
 }
 
 class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -1081,6 +1083,8 @@ let Predicates = [HasMVEInt] in {
             (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
             (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+            (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
 
   def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
             (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1088,6 +1092,8 @@ let Predicates = [HasMVEInt] in {
             (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
             (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+            (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
 
   def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
             (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1095,6 +1101,8 @@ let Predicates = [HasMVEInt] in {
             (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
             (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
+            (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
 
   def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
             (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
@@ -1102,13 +1110,17 @@ let Predicates = [HasMVEInt] in {
             (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
             (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
 
-  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
+  def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
             (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
   def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
             (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
   def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
             (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+  def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
+            (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
 }
 
 class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll
index e0af56abf123b..6e2100e2f4637 100644
--- a/llvm/test/CodeGen/Thumb2/mve-abs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll
@@ -36,3 +36,50 @@ entry:
   %2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
   ret <4 x i32> %2
 }
+
+define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: abs_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    vmov r12, s2
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    vmov r3, s3
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    rsbs.w lr, r12, #0
+; CHECK-NEXT:    sbc.w r5, r0, r3
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mov r2, lr
+; CHECK-NEXT:    lsrl r2, r5, #32
+; CHECK-NEXT:    mov.w r5, #0
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r5, #1
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    moveq r2, r3
+; CHECK-NEXT:    vmov r3, s1
+; CHECK-NEXT:    rsbs r4, r1, #0
+; CHECK-NEXT:    mov r6, r4
+; CHECK-NEXT:    sbc.w r7, r0, r3
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    lsrl r6, r7, #32
+; CHECK-NEXT:    it mi
+; CHECK-NEXT:    movmi r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ite eq
+; CHECK-NEXT:    moveq r6, r3
+; CHECK-NEXT:    movne r1, r4
+; CHECK-NEXT:    vmov.32 q0[0], r1
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    vmov.32 q0[1], r6
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    moveq lr, r12
+; CHECK-NEXT:    vmov.32 q0[2], lr
+; CHECK-NEXT:    vmov.32 q0[3], r2
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %0 = icmp slt <2 x i64> %s1, zeroinitializer
+  %1 = sub nsw <2 x i64> zeroinitializer, %s1
+  %2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1
+  ret <2 x i64> %2
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll
index 1ee57124a604e..3098181692299 100644
--- a/llvm/test/CodeGen/Thumb2/mve-bitarith.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-bitarith.ll
@@ -31,6 +31,16 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: and_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = and <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: or_int8_t:
@@ -62,6 +72,16 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: or_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = or <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: xor_int8_t:
@@ -93,6 +113,16 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: xor_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = xor <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) {
 ; CHECK-LABEL: v_mvn_i8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -123,6 +153,17 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) {
+; CHECK-LABEL: v_mvn_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmvn q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = xor <2 x i64> %src, <i64 -1, i64 -1>
+  ret <2 x i64> %0
+}
+
+
 define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: v_bic_i8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -156,6 +197,18 @@ entry:
   ret <4 x i32> %1
 }
 
+define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_bic_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vbic q0, q1, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+  %1 = and <2 x i64> %src2, %0
+  ret <2 x i64> %1
+}
+
+
 define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: v_or_i8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -188,3 +241,15 @@ entry:
   %1 = or <4 x i32> %src2, %0
   ret <4 x i32> %1
 }
+
+define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: v_or_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vorn q0, q1, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
+  %1 = or <2 x i64> %src2, %0
+  ret <2 x i64> %1
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
index 02f2225c17725..e0dddcd273c2f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll
@@ -736,6 +736,144 @@ entry:
   ret <16 x i8> %out
 }
 
+define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: udiv_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, s20
+; CHECK-NEXT:    vmov r1, s21
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    vmov r3, s17
+; CHECK-NEXT:    bl __aeabi_uldivmod
+; CHECK-NEXT:    vmov r12, s22
+; CHECK-NEXT:    vmov lr, s23
+; CHECK-NEXT:    vmov r2, s18
+; CHECK-NEXT:    vmov r3, s19
+; CHECK-NEXT:    vmov.32 q4[0], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[1], r0
+; CHECK-NEXT:    mov r0, r12
+; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    bl __aeabi_uldivmod
+; CHECK-NEXT:    vmov.32 q4[2], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[3], r0
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = udiv <2 x i64> %in1, %in2
+  ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: sdiv_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, s20
+; CHECK-NEXT:    vmov r1, s21
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    vmov r3, s17
+; CHECK-NEXT:    bl __aeabi_ldivmod
+; CHECK-NEXT:    vmov r12, s22
+; CHECK-NEXT:    vmov lr, s23
+; CHECK-NEXT:    vmov r2, s18
+; CHECK-NEXT:    vmov r3, s19
+; CHECK-NEXT:    vmov.32 q4[0], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[1], r0
+; CHECK-NEXT:    mov r0, r12
+; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    bl __aeabi_ldivmod
+; CHECK-NEXT:    vmov.32 q4[2], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[3], r0
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = sdiv <2 x i64> %in1, %in2
+  ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: urem_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, s20
+; CHECK-NEXT:    vmov r1, s21
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    vmov r3, s17
+; CHECK-NEXT:    bl __aeabi_uldivmod
+; CHECK-NEXT:    vmov r12, s18
+; CHECK-NEXT:    vmov lr, s19
+; CHECK-NEXT:    vmov.32 q4[0], r2
+; CHECK-NEXT:    vmov r0, s22
+; CHECK-NEXT:    vmov.32 q4[1], r3
+; CHECK-NEXT:    vmov r1, s23
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    mov r3, lr
+; CHECK-NEXT:    bl __aeabi_uldivmod
+; CHECK-NEXT:    vmov.32 q4[2], r2
+; CHECK-NEXT:    vmov.32 q4[3], r3
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = urem <2 x i64> %in1, %in2
+  ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
+; CHECK-LABEL: srem_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, s20
+; CHECK-NEXT:    vmov r1, s21
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    vmov r3, s17
+; CHECK-NEXT:    bl __aeabi_ldivmod
+; CHECK-NEXT:    vmov r12, s18
+; CHECK-NEXT:    vmov lr, s19
+; CHECK-NEXT:    vmov.32 q4[0], r2
+; CHECK-NEXT:    vmov r0, s22
+; CHECK-NEXT:    vmov.32 q4[1], r3
+; CHECK-NEXT:    vmov r1, s23
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    mov r3, lr
+; CHECK-NEXT:    bl __aeabi_ldivmod
+; CHECK-NEXT:    vmov.32 q4[2], r2
+; CHECK-NEXT:    vmov.32 q4[3], r3
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = srem <2 x i64> %in1, %in2
+  ret <2 x i64> %out
+}
+
+
+
 
 define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) {
 ; CHECK-LABEL: fdiv_f32:
@@ -992,3 +1130,59 @@ entry:
   %out = frem <8 x half> %in1, %in2
   ret <8 x half> %out
 }
+
+define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: fdiv_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d11
+; CHECK-NEXT:    vmov r2, r3, d9
+; CHECK-NEXT:    bl __aeabi_ddiv
+; CHECK-NEXT:    vmov lr, r12, d10
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    bl __aeabi_ddiv
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = fdiv <2 x double> %in1, %in2
+  ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) {
+; CHECK-LABEL: frem_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d11
+; CHECK-NEXT:    vmov r2, r3, d9
+; CHECK-NEXT:    bl fmod
+; CHECK-NEXT:    vmov lr, r12, d10
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    bl fmod
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = frem <2 x double> %in1, %in2
+  ret <2 x double> %out
+}
+
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
index 41054e2d34d14..31956979dd99a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll
@@ -66,6 +66,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sqrt_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl sqrt
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl sqrt
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: cos_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -198,6 +222,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
+; CHECK-LABEL: cos_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl cos
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl cos
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: sin_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -330,6 +378,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
+; CHECK-LABEL: sin_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl sin
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl sin
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: exp_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -462,6 +534,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl exp
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl exp
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: exp2_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -594,6 +690,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: exp2_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl exp2
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl exp2
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: log_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -726,6 +846,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl log
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl log
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: log2_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -858,6 +1002,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log2_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl log2
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl log2
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: log10_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -990,6 +1158,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
+; CHECK-LABEL: log10_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl log10
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl log10
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-LABEL: pow_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -1165,6 +1357,33 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: pow_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d11
+; CHECK-NEXT:    vmov r2, r3, d9
+; CHECK-NEXT:    bl pow
+; CHECK-NEXT:    vmov lr, r12, d10
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    bl pow
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-LABEL: copysign_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -1340,6 +1559,27 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: copysign_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r0, r1, d3
+; CHECK-NEXT:    vmov r0, lr, d2
+; CHECK-NEXT:    vmov r0, r3, d1
+; CHECK-NEXT:    vmov r12, r2, d0
+; CHECK-NEXT:    lsrs r1, r1, #31
+; CHECK-NEXT:    bfi r3, r1, #31, #1
+; CHECK-NEXT:    lsr.w r1, lr, #31
+; CHECK-NEXT:    bfi r2, r1, #31, #1
+; CHECK-NEXT:    vmov d1, r0, r3
+; CHECK-NEXT:    vmov d0, r12, r2
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
+  ret <2 x double> %0
+}
+
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
@@ -1360,4 +1600,14 @@ declare <8 x half> @llvm.log2.v8f16(<8 x half>)
 declare <8 x half> @llvm.log10.v8f16(<8 x half>)
 declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <2 x double> @llvm.cos.v2f64(<2 x double>)
+declare <2 x double> @llvm.sin.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp.v2f64(<2 x double>)
+declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+declare <2 x double> @llvm.log2.v2f64(<2 x double>)
+declare <2 x double> @llvm.log10.v2f64(<2 x double>)
+declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
 
diff --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
index 2489646ad6d62..d1fd4b2676289 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
@@ -76,6 +76,39 @@ entry:
   ret <4 x float> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fneg_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vldr d0, .LCPI2_0
+; CHECK-NEXT:    vmov r2, r3, d9
+; CHECK-NEXT:    vmov r4, r5, d0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dsub
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    mov r1, r5
+; CHECK-NEXT:    bl __aeabi_dsub
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    .p2align 3
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI2_0:
+; CHECK-NEXT:    .long 0 @ double -0
+; CHECK-NEXT:    .long 2147483648
+entry:
+  %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) {
 ; CHECK-MVE-LABEL: fabs_float16_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -150,6 +183,30 @@ entry:
   ret <4 x float> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @fabs_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fabs_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldr d2, .LCPI5_0
+; CHECK-NEXT:    vmov r12, r3, d0
+; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov r0, r2, d1
+; CHECK-NEXT:    lsrs r1, r1, #31
+; CHECK-NEXT:    bfi r2, r1, #31, #1
+; CHECK-NEXT:    bfi r3, r1, #31, #1
+; CHECK-NEXT:    vmov d1, r0, r2
+; CHECK-NEXT:    vmov d0, r12, r3
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 3
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI5_0:
+; CHECK-NEXT:    .long 0 @ double 0
+; CHECK-NEXT:    .long 0
+entry:
+  %0 = call nnan ninf nsz <2 x double> @llvm.fabs.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
 
diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll
index 847d7ede1d73c..e1758d5ed3b27 100644
--- a/llvm/test/CodeGen/Thumb2/mve-frint.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll
@@ -76,6 +76,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fceil_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl ceil
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl ceil
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) {
 ; CHECK-MVE-LABEL: ftrunc_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -150,6 +174,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ftrunc_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl trunc
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl trunc
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) {
 ; CHECK-MVE-LABEL: frint_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -224,6 +272,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: frint_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl rint
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl rint
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) {
 ; CHECK-LABEL: fnearbyint_float32_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -288,6 +360,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fnearbyint_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl nearbyint
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) {
 ; CHECK-MVE-LABEL: ffloor_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -362,6 +458,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) {
+; CHECK-LABEL: ffloor_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl floor
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl floor
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) {
 ; CHECK-MVE-LABEL: fround_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -436,6 +556,30 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) {
+; CHECK-LABEL: fround_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl round
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl round
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src)
+  ret <2 x double> %0
+}
+
 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
@@ -448,3 +592,9 @@ declare <8 x half> @llvm.rint.v8f16(<8 x half>)
 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
 declare <8 x half> @llvm.round.v8f16(<8 x half>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
index 38990d3571744..0b6308f880085 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll
@@ -35,6 +35,49 @@ entry:
   ret <4 x i32> %1
 }
 
+define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smin_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r2, s6
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    vmov r12, s7
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    vmov lr, s1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    vmov r3, s0
+; CHECK-NEXT:    vmov r2, s4
+; CHECK-NEXT:    sbcs.w r1, r1, r12
+; CHECK-NEXT:    vmov r12, s5
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    sbcs.w r2, lr, r12
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov.32 q2[0], r0
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmov.32 q2[2], r1
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vbic q1, q1, q2
+; CHECK-NEXT:    vand q0, q0, q2
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = icmp slt <2 x i64> %s1, %s2
+  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+  ret <2 x i64> %1
+}
+
 define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
 ; CHECK-LABEL: umin_v16i8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -68,6 +111,49 @@ entry:
   ret <4 x i32> %1
 }
 
+define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umin_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r2, s6
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    vmov r12, s7
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    vmov lr, s1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    vmov r3, s0
+; CHECK-NEXT:    vmov r2, s4
+; CHECK-NEXT:    sbcs.w r1, r1, r12
+; CHECK-NEXT:    vmov r12, s5
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    sbcs.w r2, lr, r12
+; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov.32 q2[0], r0
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmov.32 q2[2], r1
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vbic q1, q1, q2
+; CHECK-NEXT:    vand q0, q0, q2
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = icmp ult <2 x i64> %s1, %s2
+  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+  ret <2 x i64> %1
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
 ; CHECK-LABEL: smax_v16i8:
@@ -102,6 +188,49 @@ entry:
   ret <4 x i32> %1
 }
 
+define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: smax_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    vmov r12, s3
+; CHECK-NEXT:    vmov r1, s7
+; CHECK-NEXT:    vmov lr, s5
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    sbcs.w r1, r1, r12
+; CHECK-NEXT:    vmov r12, s1
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    sbcs.w r2, lr, r12
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov.32 q2[0], r0
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmov.32 q2[2], r1
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vbic q1, q1, q2
+; CHECK-NEXT:    vand q0, q0, q2
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = icmp sgt <2 x i64> %s1, %s2
+  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+  ret <2 x i64> %1
+}
+
 define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) {
 ; CHECK-LABEL: umax_v16i8:
 ; CHECK:       @ %bb.0: @ %entry
@@ -135,6 +264,49 @@ entry:
   ret <4 x i32> %1
 }
 
+define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) {
+; CHECK-LABEL: umax_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    vmov r12, s3
+; CHECK-NEXT:    vmov r1, s7
+; CHECK-NEXT:    vmov lr, s5
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    vmov r3, s4
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    sbcs.w r1, r1, r12
+; CHECK-NEXT:    vmov r12, s1
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    sbcs.w r2, lr, r12
+; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov.32 q2[0], r0
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmov.32 q2[2], r1
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vbic q1, q1, q2
+; CHECK-NEXT:    vand q0, q0, q2
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = icmp ugt <2 x i64> %s1, %s2
+  %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2
+  ret <2 x i64> %1
+}
+
 
 define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-MVE-LABEL: maxnm_float32_t:
@@ -227,3 +399,46 @@ entry:
   %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2
   ret <8 x half> %0
 }
+
+define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: maxnm_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r2, r3, d11
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    vmov r2, r3, d10
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r4, #-1
+; CHECK-NEXT:    bl __aeabi_dcmpgt
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    vmov.32 q0[0], r0
+; CHECK-NEXT:    vmov.32 q0[1], r0
+; CHECK-NEXT:    vmov.32 q0[2], r4
+; CHECK-NEXT:    vmov.32 q0[3], r4
+; CHECK-NEXT:    vbic q1, q5, q0
+; CHECK-NEXT:    vand q0, q4, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r4, pc}
+entry:
+  %cmp = fcmp fast ogt <2 x double> %src2, %src1
+  %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1
+  ret <2 x double> %0
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-neg.ll b/llvm/test/CodeGen/Thumb2/mve-neg.ll
index f1c4352e3edb0..602ce3d5f9be6 100644
--- a/llvm/test/CodeGen/Thumb2/mve-neg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-neg.ll
@@ -30,3 +30,26 @@ entry:
   %0 = sub nsw <4 x i32> zeroinitializer, %s1
   ret <4 x i32> %0
 }
+
+define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) {
+; CHECK-LABEL: neg_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    mov.w r12, #0
+; CHECK-NEXT:    vmov r0, s3
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    vmov r3, s1
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    sbc.w r0, r12, r0
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    sbc.w r3, r12, r3
+; CHECK-NEXT:    vmov.32 q0[0], r2
+; CHECK-NEXT:    vmov.32 q0[1], r3
+; CHECK-NEXT:    vmov.32 q0[2], r1
+; CHECK-NEXT:    vmov.32 q0[3], r0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sub nsw <2 x i64> zeroinitializer, %s1
+  ret <2 x i64> %0
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll
index 9458fdc47e582..452e68405feb3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-sext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll
@@ -32,6 +32,24 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: sext_v2i32_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vmov.32 q1[0], r0
+; CHECK-NEXT:    asrs r0, r0, #31
+; CHECK-NEXT:    vmov.32 q1[1], r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov.32 q1[2], r0
+; CHECK-NEXT:    asrs r0, r0, #31
+; CHECK-NEXT:    vmov.32 q1[3], r0
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = sext <2 x i32> %src to <2 x i64>
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
 ; CHECK-LABEL: zext_v8i8_v8i16:
@@ -64,6 +82,25 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
+; CHECK-LABEL: zext_v2i32_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    adr r0, .LCPI7_0
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI7_0:
+; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
+; CHECK-NEXT:    .long 0 @ 0x0
+entry:
+  %0 = zext <2 x i32> %src to <2 x i64>
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
 ; CHECK-LABEL: trunc_v8i16_v8i8:
@@ -91,3 +128,13 @@ entry:
   %0 = trunc <4 x i32> %src to <4 x i8>
   ret <4 x i8> %0
 }
+
+define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) {
+; CHECK-LABEL: trunc_v2i64_v2i32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = trunc <2 x i64> %src to <2 x i32>
+  ret <2 x i32> %0
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-shifts.ll b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
index 4e6e5ae7f6acc..a321c2dd38338 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shifts.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shifts.ll
@@ -31,6 +31,28 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shl_qq_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    lsll r2, r1, r0
+; CHECK-NEXT:    vmov r0, s6
+; CHECK-NEXT:    vmov.32 q2[0], r2
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov.32 q2[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    lsll r2, r1, r0
+; CHECK-NEXT:    vmov.32 q2[2], r2
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: shru_qq_int8_t:
@@ -65,6 +87,30 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shru_qq_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r2, s4
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    lsll r0, r1, r2
+; CHECK-NEXT:    vmov r2, s6
+; CHECK-NEXT:    vmov.32 q2[0], r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov.32 q2[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    rsbs r2, r2, #0
+; CHECK-NEXT:    lsll r0, r1, r2
+; CHECK-NEXT:    vmov.32 q2[2], r0
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: shrs_qq_int8_t:
@@ -99,6 +145,28 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: shrs_qq_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    asrl r2, r1, r0
+; CHECK-NEXT:    vmov r0, s6
+; CHECK-NEXT:    vmov.32 q2[0], r2
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov.32 q2[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    asrl r2, r1, r0
+; CHECK-NEXT:    vmov.32 q2[2], r2
+; CHECK-NEXT:    vmov.32 q2[3], r1
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) {
 ; CHECK-LABEL: shl_qi_int8_t:
@@ -130,6 +198,26 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shl_qi_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    lsll r0, r1, #4
+; CHECK-NEXT:    vmov.32 q1[0], r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov.32 q1[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    lsll r0, r1, #4
+; CHECK-NEXT:    vmov.32 q1[2], r0
+; CHECK-NEXT:    vmov.32 q1[3], r1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl <2 x i64> %src1, <i64 4, i64 4>
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) {
 ; CHECK-LABEL: shru_qi_int8_t:
@@ -161,6 +249,26 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shru_qi_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    lsrl r0, r1, #4
+; CHECK-NEXT:    vmov.32 q1[0], r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov.32 q1[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    lsrl r0, r1, #4
+; CHECK-NEXT:    vmov.32 q1[2], r0
+; CHECK-NEXT:    vmov.32 q1[3], r1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = lshr <2 x i64> %src1, <i64 4, i64 4>
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) {
 ; CHECK-LABEL: shrs_qi_int8_t:
@@ -192,6 +300,25 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) {
+; CHECK-LABEL: shrs_qi_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    asrl r0, r1, #4
+; CHECK-NEXT:    vmov.32 q1[0], r0
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov.32 q1[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    asrl r0, r1, #4
+; CHECK-NEXT:    vmov.32 q1[2], r0
+; CHECK-NEXT:    vmov.32 q1[3], r1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = ashr <2 x i64> %src1, <i64 4, i64 4>
+  ret <2 x i64> %0
+}
 
 
 define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) {
@@ -230,6 +357,28 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shl_qr_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    lsll r2, r1, r0
+; CHECK-NEXT:    vmov.32 q1[0], r2
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov.32 q1[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    lsll r2, r1, r0
+; CHECK-NEXT:    vmov.32 q1[2], r2
+; CHECK-NEXT:    vmov.32 q1[3], r1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+  %0 = shl <2 x i64> %src1, %s
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) {
 ; CHECK-LABEL: shru_qr_int8_t:
@@ -273,6 +422,29 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shru_qr_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    lsll r2, r1, r0
+; CHECK-NEXT:    vmov.32 q1[0], r2
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov.32 q1[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    lsll r2, r1, r0
+; CHECK-NEXT:    vmov.32 q1[2], r2
+; CHECK-NEXT:    vmov.32 q1[3], r1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+  %0 = lshr <2 x i64> %src1, %s
+  ret <2 x i64> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) {
 ; CHECK-LABEL: shrs_qr_int8_t:
@@ -316,17 +488,38 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) {
+; CHECK-LABEL: shrs_qr_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    asrl r2, r1, r0
+; CHECK-NEXT:    vmov.32 q1[0], r2
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov.32 q1[1], r1
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    asrl r2, r1, r0
+; CHECK-NEXT:    vmov.32 q1[2], r2
+; CHECK-NEXT:    vmov.32 q1[3], r1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %i = insertelement <2 x i64> undef, i64 %src2, i32 0
+  %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer
+  %0 = ashr <2 x i64> %src1, %s
+  ret <2 x i64> %0
+}
 
 define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) {
 ; CHECK-LABEL: shl_qiv_int8_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adr r0, .LCPI27_0
+; CHECK-NEXT:    adr r0, .LCPI36_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
 ; CHECK-NEXT:    vshl.u8 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI27_0:
+; CHECK-NEXT:  .LCPI36_0:
 ; CHECK-NEXT:    .byte 1 @ 0x1
 ; CHECK-NEXT:    .byte 2 @ 0x2
 ; CHECK-NEXT:    .byte 3 @ 0x3
@@ -351,13 +544,13 @@ entry:
 define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) {
 ; CHECK-LABEL: shl_qiv_int16_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adr r0, .LCPI28_0
+; CHECK-NEXT:    adr r0, .LCPI37_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
 ; CHECK-NEXT:    vshl.u16 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI28_0:
+; CHECK-NEXT:  .LCPI37_0:
 ; CHECK-NEXT:    .short 1 @ 0x1
 ; CHECK-NEXT:    .short 2 @ 0x2
 ; CHECK-NEXT:    .short 3 @ 0x3
@@ -374,13 +567,13 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) {
 ; CHECK-LABEL: shl_qiv_int32_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adr r0, .LCPI29_0
+; CHECK-NEXT:    adr r0, .LCPI38_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
 ; CHECK-NEXT:    vshl.u32 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI29_0:
+; CHECK-NEXT:  .LCPI38_0:
 ; CHECK-NEXT:    .long 1 @ 0x1
 ; CHECK-NEXT:    .long 2 @ 0x2
 ; CHECK-NEXT:    .long 3 @ 0x3
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index 87d209b5d2fd9..b5c96f9f86dc3 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -262,6 +262,38 @@ entry:
   ret <16 x i8> %out
 }
 
+define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle1_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle2_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f32 s4, s2
+; CHECK-NEXT:    vmov.f32 s5, s3
+; CHECK-NEXT:    vmov.f32 s6, s0
+; CHECK-NEXT:    vmov.f32 s7, s1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
+; CHECK-LABEL: shuffle3_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
+  ret <2 x i64> %out
+}
+
 define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
 ; CHECK-LABEL: shuffle1_f32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -390,6 +422,38 @@ entry:
   ret <8 x half> %out
 }
 
+define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle1_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle2_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f32 s4, s2
+; CHECK-NEXT:    vmov.f32 s5, s3
+; CHECK-NEXT:    vmov.f32 s6, s0
+; CHECK-NEXT:    vmov.f32 s7, s1
+; CHECK-NEXT:    vmov q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
+; CHECK-LABEL: shuffle3_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
+  ret <2 x double> %out
+}
+
 
 define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
 ; CHECK-LABEL: insert_i32:
@@ -421,6 +485,17 @@ entry:
   ret <16 x i8> %res
 }
 
+define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
+; CHECK-LABEL: insert_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.32 q0[0], r0
+; CHECK-NEXT:    vmov.32 q0[1], r1
+; CHECK-NEXT:    bx lr
+entry:
+  %res = insertelement <2 x i64> undef, i64 %a, i32 0
+  ret <2 x i64> %res
+}
+
 define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
 ; CHECK-LABEL: insert_f32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -443,12 +518,35 @@ entry:
   ret <8 x half> %res
 }
 
+define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
+; CHECK-LABEL: insert_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r6, r7, lr}
+; CHECK-NEXT:    .setfp r7, sp, #8
+; CHECK-NEXT:    add r7, sp, #8
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    bfc r4, #0, #4
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    sub.w r4, r7, #8
+; CHECK-NEXT:    vstr d0, [sp]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop {r4, r6, r7, pc}
+entry:
+  %res = insertelement <2 x double> undef, double %a, i32 0
+  ret <2 x double> %res
+}
+
 define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
 ; CHECK-LABEL: scalar_to_vector_i32:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .pad #8
 ; CHECK-NEXT:    sub sp, #8
-; CHECK-NEXT:    adr r1, .LCPI30_0
+; CHECK-NEXT:    adr r1, .LCPI38_0
 ; CHECK-NEXT:    vmov.u16 r0, q0[0]
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
 ; CHECK-NEXT:    vmov.32 q0[0], r0
@@ -461,7 +559,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI30_0:
+; CHECK-NEXT:  .LCPI38_0:
 ; CHECK-NEXT:    .zero 4
 ; CHECK-NEXT:    .long 7 @ 0x7
 ; CHECK-NEXT:    .long 1 @ 0x1
@@ -533,6 +631,28 @@ entry:
   ret i8 %res
 }
 
+define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    bx lr
+entry:
+  %res = extractelement <2 x i64> %a, i32 0
+  ret i64 %res
+}
+
+define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
+; CHECK-LABEL: extract_i64_1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    bx lr
+entry:
+  %res = extractelement <2 x i64> %a, i32 1
+  ret i64 %res
+}
+
 define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
 ; CHECK-LABEL: extract_f32_0:
 ; CHECK:       @ %bb.0: @ %entry
@@ -576,3 +696,25 @@ entry:
   %res = extractelement <8 x half> %a, i32 3
   ret half %res
 }
+
+define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_0:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bx lr
+entry:
+  %res = extractelement <2 x double> %a, i32 0
+  ret double %res
+}
+
+define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
+; CHECK-LABEL: extract_f64_1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f32 s0, s2
+; CHECK-NEXT:    vmov.f32 s1, s3
+; CHECK-NEXT:    bx lr
+entry:
+  %res = extractelement <2 x double> %a, i32 1
+  ret double %res
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
index 570aae3f21be4..ecad0c1c5dfd0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll
@@ -32,6 +32,33 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: add_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r2, s6
+; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    vmov r0, s7
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    adds.w lr, r3, r2
+; CHECK-NEXT:    vmov r2, s0
+; CHECK-NEXT:    vmov r3, s1
+; CHECK-NEXT:    adc.w r12, r1, r0
+; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    vmov r1, s5
+; CHECK-NEXT:    adds r0, r0, r2
+; CHECK-NEXT:    adcs r1, r3
+; CHECK-NEXT:    vmov.32 q0[0], r0
+; CHECK-NEXT:    vmov.32 q0[1], r1
+; CHECK-NEXT:    vmov.32 q0[2], lr
+; CHECK-NEXT:    vmov.32 q0[3], r12
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = add nsw <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-MVE-LABEL: add_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -122,6 +149,33 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: add_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r2, r3, d11
+; CHECK-NEXT:    bl __aeabi_dadd
+; CHECK-NEXT:    vmov lr, r12, d8
+; CHECK-NEXT:    vmov r2, r3, d10
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    bl __aeabi_dadd
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = fadd nnan ninf nsz <2 x double> %src2, %src1
+  ret <2 x double> %0
+}
+
 
 define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: sub_int8_t:
@@ -153,6 +207,33 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: sub_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r2, s2
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    vmov r0, s3
+; CHECK-NEXT:    vmov r1, s7
+; CHECK-NEXT:    subs.w lr, r3, r2
+; CHECK-NEXT:    vmov r2, s4
+; CHECK-NEXT:    vmov r3, s5
+; CHECK-NEXT:    sbc.w r12, r1, r0
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vmov r1, s1
+; CHECK-NEXT:    subs r0, r2, r0
+; CHECK-NEXT:    sbc.w r1, r3, r1
+; CHECK-NEXT:    vmov.32 q0[0], r0
+; CHECK-NEXT:    vmov.32 q0[1], r1
+; CHECK-NEXT:    vmov.32 q0[2], lr
+; CHECK-NEXT:    vmov.32 q0[3], r12
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = sub nsw <2 x i64> %src2, %src1
+  ret <2 x i64> %0
+}
+
 define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) {
 ; CHECK-MVE-LABEL: sub_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -243,6 +324,34 @@ entry:
   ret <8 x half> %0
 }
 
+define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: sub_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r2, r3, d11
+; CHECK-NEXT:    bl __aeabi_dsub
+; CHECK-NEXT:    vmov lr, r12, d8
+; CHECK-NEXT:    vmov r2, r3, d10
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    bl __aeabi_dsub
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = fsub nnan ninf nsz <2 x double> %src2, %src1
+  ret <2 x double> %0
+}
+
+
 define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
 ; CHECK-LABEL: mul_int8_t:
 ; CHECK:       @ %bb.0: @ %entry
@@ -273,6 +382,35 @@ entry:
   ret <4 x i32> %0
 }
 
+define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: mul_int64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    vmov r0, s4
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    vmov r2, s5
+; CHECK-NEXT:    umull r12, r3, r1, r0
+; CHECK-NEXT:    mla lr, r1, r2, r3
+; CHECK-NEXT:    vmov r3, s6
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    vmov r2, s7
+; CHECK-NEXT:    umull r4, r5, r1, r3
+; CHECK-NEXT:    mla r1, r1, r2, r5
+; CHECK-NEXT:    vmov r2, s1
+; CHECK-NEXT:    mla r0, r2, r0, lr
+; CHECK-NEXT:    vmov r2, s3
+; CHECK-NEXT:    vmov.32 q0[0], r12
+; CHECK-NEXT:    vmov.32 q0[1], r0
+; CHECK-NEXT:    vmov.32 q0[2], r4
+; CHECK-NEXT:    mla r1, r2, r3, r1
+; CHECK-NEXT:    vmov.32 q0[3], r1
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+entry:
+  %0 = mul nsw <2 x i64> %src1, %src2
+  ret <2 x i64> %0
+}
+
 define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) {
 ; CHECK-MVE-LABEL: mul_float16_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
@@ -362,3 +500,31 @@ entry:
   %0 = fmul nnan ninf nsz <4 x float> %src2, %src1
   ret <4 x float> %0
 }
+
+define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: mul_float64_t:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    vmov q4, q1
+; CHECK-NEXT:    vmov q5, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    vmov r2, r3, d11
+; CHECK-NEXT:    bl __aeabi_dmul
+; CHECK-NEXT:    vmov lr, r12, d8
+; CHECK-NEXT:    vmov r2, r3, d10
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, lr
+; CHECK-NEXT:    mov r1, r12
+; CHECK-NEXT:    bl __aeabi_dmul
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %0 = fmul nnan ninf nsz <2 x double> %src2, %src1
+  ret <2 x double> %0
+}
+
diff --git a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
index 794f7ba20c252..37ca5a2f20208 100644
--- a/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll
@@ -50,6 +50,39 @@ entry:
   ret <4 x i32> %sum
 }
 
+define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) {
+; CHECK-FP-LABEL: vector_add_i64:
+; CHECK-FP:       @ %bb.0: @ %entry
+; CHECK-FP-NEXT:    .save {r7, lr}
+; CHECK-FP-NEXT:    push {r7, lr}
+; CHECK-FP-NEXT:    vmov d1, r2, r3
+; CHECK-FP-NEXT:    vmov d0, r0, r1
+; CHECK-FP-NEXT:    add r0, sp, #8
+; CHECK-FP-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-FP-NEXT:    vmov r1, s2
+; CHECK-FP-NEXT:    vmov r0, s3
+; CHECK-FP-NEXT:    vmov r3, s6
+; CHECK-FP-NEXT:    vmov r2, s7
+; CHECK-FP-NEXT:    adds.w lr, r1, r3
+; CHECK-FP-NEXT:    vmov r3, s0
+; CHECK-FP-NEXT:    vmov r1, s4
+; CHECK-FP-NEXT:    adc.w r12, r0, r2
+; CHECK-FP-NEXT:    vmov r2, s1
+; CHECK-FP-NEXT:    vmov r0, s5
+; CHECK-FP-NEXT:    adds r1, r1, r3
+; CHECK-FP-NEXT:    vmov.32 q0[0], r1
+; CHECK-FP-NEXT:    adcs r0, r2
+; CHECK-FP-NEXT:    vmov.32 q0[1], r0
+; CHECK-FP-NEXT:    vmov.32 q0[2], lr
+; CHECK-FP-NEXT:    vmov.32 q0[3], r12
+; CHECK-FP-NEXT:    vmov r0, r1, d0
+; CHECK-FP-NEXT:    vmov r2, r3, d1
+; CHECK-FP-NEXT:    pop {r7, pc}
+entry:
+  %sum = add <2 x i64> %lhs, %rhs
+  ret <2 x i64> %sum
+}
+
 define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
 ; CHECK-FP-LABEL: vector_add_f16:
 ; CHECK-FP:       @ %bb.0: @ %entry
@@ -81,3 +114,38 @@ entry:
   %sum = fadd <4 x float> %lhs, %rhs
   ret <4 x float> %sum
 }
+
+define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) {
+; CHECK-FP-LABEL: vector_add_f64:
+; CHECK-FP:       @ %bb.0: @ %entry
+; CHECK-FP-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-FP-NEXT:    .pad #4
+; CHECK-FP-NEXT:    sub sp, #4
+; CHECK-FP-NEXT:    .vsave {d8, d9}
+; CHECK-FP-NEXT:    vpush {d8, d9}
+; CHECK-FP-NEXT:    mov r5, r0
+; CHECK-FP-NEXT:    add r0, sp, #40
+; CHECK-FP-NEXT:    vldrw.u32 q4, [r0]
+; CHECK-FP-NEXT:    mov r4, r2
+; CHECK-FP-NEXT:    mov r6, r3
+; CHECK-FP-NEXT:    mov r7, r1
+; CHECK-FP-NEXT:    vmov r2, r3, d9
+; CHECK-FP-NEXT:    mov r0, r4
+; CHECK-FP-NEXT:    mov r1, r6
+; CHECK-FP-NEXT:    bl __aeabi_dadd
+; CHECK-FP-NEXT:    vmov r2, r3, d8
+; CHECK-FP-NEXT:    vmov d9, r0, r1
+; CHECK-FP-NEXT:    mov r0, r5
+; CHECK-FP-NEXT:    mov r1, r7
+; CHECK-FP-NEXT:    bl __aeabi_dadd
+; CHECK-FP-NEXT:    vmov d8, r0, r1
+; CHECK-FP-NEXT:    vmov r2, r3, d9
+; CHECK-FP-NEXT:    vmov r0, r1, d8
+; CHECK-FP-NEXT:    vpop {d8, d9}
+; CHECK-FP-NEXT:    add sp, #4
+; CHECK-FP-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %sum = fadd <2 x double> %lhs, %rhs
+  ret <2 x double> %sum
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
index 55d354ae639c3..524ec692c8c17 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll
@@ -317,3 +317,111 @@ entry:
   %out = fptoui <8 x half> %src to <8 x i16>
   ret <8 x i16> %out
 }
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_int64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    bl __aeabi_l2d
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    vmov r3, s17
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl __aeabi_l2d
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = sitofp <2 x i64> %src to <2 x double>
+  ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) {
+; CHECK-LABEL: foo_float_uint64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov r1, s19
+; CHECK-NEXT:    bl __aeabi_ul2d
+; CHECK-NEXT:    vmov r2, s16
+; CHECK-NEXT:    vmov r3, s17
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl __aeabi_ul2d
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = uitofp <2 x i64> %src to <2 x double>
+  ret <2 x double> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_int64_float:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    vmov r2, r3, d9
+; CHECK-NEXT:    vmov.32 q4[0], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[1], r0
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    vmov.32 q4[2], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[3], r0
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = fptosi <2 x double> %src to <2 x i64>
+  ret <2 x i64> %out
+}
+
+define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
+; CHECK-LABEL: foo_uint64_float:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    vmov r2, r3, d9
+; CHECK-NEXT:    vmov.32 q4[0], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[1], r0
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    vmov.32 q4[2], r0
+; CHECK-NEXT:    lsrl r0, r1, #32
+; CHECK-NEXT:    vmov.32 q4[3], r0
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %out = fptoui <2 x double> %src to <2 x i64>
+  ret <2 x i64> %out
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 9629024f63e01..3cc9cfd3f442f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -35,6 +35,20 @@ entry:
   ret <16 x i8> %out
 }
 
+define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
+; CHECK-LABEL: vdup_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.32 q0[0], r0
+; CHECK-NEXT:    vmov.32 q0[1], r1
+; CHECK-NEXT:    vmov.32 q0[2], r0
+; CHECK-NEXT:    vmov.32 q0[3], r1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = insertelement <2 x i64> undef, i64 %src, i32 0
+  %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %out
+}
+
 define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
 ; CHECK-LABEL: vdup_f32_1:
 ; CHECK:       @ %bb.0: @ %entry
@@ -80,6 +94,19 @@ entry:
   ret <8 x half> %out
 }
 
+define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
+; CHECK-LABEL: vdup_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    vmov.f32 s2, s0
+; CHECK-NEXT:    vmov.f32 s3, s1
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = insertelement <2 x double> undef, double %src, i32 0
+  %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %out
+}
+
 
 
 define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
@@ -115,6 +142,17 @@ entry:
   ret <16 x i8> %out
 }
 
+define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
+; CHECK-LABEL: vduplane_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f32 s0, s2
+; CHECK-NEXT:    vmov.f32 s1, s3
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %out
+}
+
 define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
 ; CHECK-LABEL: vduplane_f32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -136,3 +174,14 @@ entry:
   %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   ret <8 x half> %out
 }
+
+define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
+; CHECK-LABEL: vduplane_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.f32 s0, s2
+; CHECK-NEXT:    vmov.f32 s1, s3
+; CHECK-NEXT:    bx lr
+entry:
+  %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %out
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index 5281ecd17c315..94721a54b942e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -11,7 +11,7 @@ entry:
   ret <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 }
 
-define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) {
+define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() {
 ; CHECK-LABEL: mov_int8_m1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i8 q0, #0xff
@@ -20,7 +20,7 @@ entry:
   ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 }
 
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() {
 ; CHECK-LABEL: mov_int16_1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i16 q0, #0x1
@@ -29,7 +29,7 @@ entry:
   ret <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 }
 
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() {
 ; CHECK-LABEL: mov_int16_m1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i8 q0, #0xff
@@ -38,7 +38,7 @@ entry:
   ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 }
 
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() {
 ; CHECK-LABEL: mov_int16_256:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i16 q0, #0x100
@@ -56,7 +56,7 @@ entry:
   ret <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
 }
 
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() {
 ; CHECK-LABEL: mov_int16_258:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    adr r0, .LCPI6_0
@@ -73,7 +73,7 @@ entry:
   ret <8 x i16> <i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258, i16 258>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() {
 ; CHECK-LABEL: mov_int32_1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q0, #0x1
@@ -82,7 +82,7 @@ entry:
   ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() {
 ; CHECK-LABEL: mov_int32_256:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q0, #0x100
@@ -91,7 +91,7 @@ entry:
   ret <4 x i32> <i32 256, i32 256, i32 256, i32 256>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() {
 ; CHECK-LABEL: mov_int32_65536:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q0, #0x10000
@@ -100,7 +100,7 @@ entry:
   ret <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() {
 ; CHECK-LABEL: mov_int32_16777216:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q0, #0x1000000
@@ -109,7 +109,7 @@ entry:
   ret <4 x i32> <i32 16777216, i32 16777216, i32 16777216, i32 16777216>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() {
 ; CHECK-LABEL: mov_int32_16777217:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    adr r0, .LCPI11_0
@@ -126,7 +126,7 @@ entry:
   ret <4 x i32> <i32 16777217, i32 16777217, i32 16777217, i32 16777217>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() {
 ; CHECK-LABEL: mov_int32_17919:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q0, #0x45ff
@@ -135,7 +135,7 @@ entry:
   ret <4 x i32> <i32 17919, i32 17919, i32 17919, i32 17919>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() {
 ; CHECK-LABEL: mov_int32_4587519:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i32 q0, #0x45ffff
@@ -144,7 +144,7 @@ entry:
   ret <4 x i32> <i32 4587519, i32 4587519, i32 4587519, i32 4587519>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() {
 ; CHECK-LABEL: mov_int32_m1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i8 q0, #0xff
@@ -153,7 +153,7 @@ entry:
   ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() {
 ; CHECK-LABEL: mov_int32_4294901760:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0xffff
@@ -162,7 +162,7 @@ entry:
   ret <4 x i32> <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() {
 ; CHECK-LABEL: mov_int32_4278190335:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    adr r0, .LCPI16_0
@@ -179,7 +179,7 @@ entry:
   ret <4 x i32> <i32 4278190335, i32 4278190335, i32 4278190335, i32 4278190335>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() {
 ; CHECK-LABEL: mov_int32_4278255615:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0xff0000
@@ -188,8 +188,8 @@ entry:
   ret <4 x i32> <i32 4278255615, i32 4278255615, i32 4278255615, i32 4278255615>
 }
 
-define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {
-; CHECK-LABEL: mov_float_1:
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() {
+; CHECK-LABEL: mov_int64_1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    adr r0, .LCPI18_0
 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
@@ -197,6 +197,32 @@ define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
 ; CHECK-NEXT:  .LCPI18_0:
+; CHECK-NEXT:    .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT:    .long 0
+; CHECK-NEXT:    .long 1 @ double 4.9406564584124654E-324
+; CHECK-NEXT:    .long 0
+entry:
+  ret <2 x i64> <i64 1, i64 1>
+}
+
+define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() {
+; CHECK-LABEL: mov_int64_m1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov.i8 q0, #0xff
+; CHECK-NEXT:    bx lr
+entry:
+  ret <2 x i64> <i64 -1, i64 -1>
+}
+
+define arm_aapcs_vfpcc <4 x float> @mov_float_1() {
+; CHECK-LABEL: mov_float_1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    adr r0, .LCPI20_0
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI20_0:
 ; CHECK-NEXT:    .long 1065353216 @ double 0.007812501848093234
 ; CHECK-NEXT:    .long 1065353216
 ; CHECK-NEXT:    .long 1065353216 @ double 0.007812501848093234
@@ -205,15 +231,15 @@ entry:
   ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 }
 
-define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) {
+define arm_aapcs_vfpcc <4 x float> @mov_float_m3() {
 ; CHECK-LABEL: mov_float_m3:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adr r0, .LCPI19_0
+; CHECK-NEXT:    adr r0, .LCPI21_0
 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI19_0:
+; CHECK-NEXT:  .LCPI21_0:
 ; CHECK-NEXT:    .long 3225419776 @ double -32.000022917985916
 ; CHECK-NEXT:    .long 3225419776
 ; CHECK-NEXT:    .long 3225419776 @ double -32.000022917985916
@@ -222,7 +248,7 @@ entry:
   ret <4 x float> <float -3.000000e+00, float -3.000000e+00, float -3.000000e+00, float -3.000000e+00>
 }
 
-define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_1() {
 ; CHECK-LABEL: mov_float16_1:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i16 q0, #0x3c00
@@ -232,7 +258,7 @@ entry:
   ret <8 x half> <half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00, half 1.000000e+00>
 }
 
-define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {
+define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() {
 ; CHECK-LABEL: mov_float16_m3:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmov.i16 q0, #0xc200
@@ -241,3 +267,20 @@ define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) {
 entry:
   ret <8 x half> <half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00, half -3.000000e+00>
 }
+
+define arm_aapcs_vfpcc <2 x double> @mov_double_1() {
+; CHECK-LABEL: mov_double_1:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    adr r0, .LCPI24_0
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI24_0:
+; CHECK-NEXT:    .long 0 @ double 1
+; CHECK-NEXT:    .long 1072693248
+; CHECK-NEXT:    .long 0 @ double 1
+; CHECK-NEXT:    .long 1072693248
+entry:
+  ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
index fb27d895484e0..8f6ea13befc08 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
 
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_511() {
 ; CHECK-LABEL: mov_int16_511:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i16 q0, #0xfe00
@@ -11,7 +11,7 @@ entry:
   ret <8 x i16> <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
 }
 
-define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) {
+define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281() {
 ; CHECK-LABEL: mov_int16_65281:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i16 q0, #0xfe
@@ -20,7 +20,7 @@ entry:
   ret <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7() {
 ; CHECK-LABEL: mov_int32_m7:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0x6
@@ -29,7 +29,7 @@ entry:
   ret <4 x i32> <i32 -7, i32 -7, i32 -7, i32 -7>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769() {
 ; CHECK-LABEL: mov_int32_m769:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0x300
@@ -38,7 +38,7 @@ entry:
   ret <4 x i32> <i32 -769, i32 -769, i32 -769, i32 -769>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145() {
 ; CHECK-LABEL: mov_int32_m262145:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0x40000
@@ -47,7 +47,7 @@ entry:
   ret <4 x i32> <i32 -262145, i32 -262145, i32 -262145, i32 -262145>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729() {
 ; CHECK-LABEL: mov_int32_m134217729:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0x8000000
@@ -56,7 +56,7 @@ entry:
   ret <4 x i32> <i32 -134217729, i32 -134217729, i32 -134217729, i32 -134217729>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528() {
 ; CHECK-LABEL: mov_int32_4294902528:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    vmvn.i32 q0, #0xfcff
@@ -65,7 +65,7 @@ entry:
   ret <4 x i32> <i32 4294902528, i32 4294902528, i32 4294902528, i32 4294902528>
 }
 
-define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) {
+define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() {
 ; CHECK-LABEL: mov_int32_4278386688:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    adr r0, .LCPI7_0

From ac6375d99d19e2c8c53dc72caa457833f6fa6641 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Mon, 15 Jul 2019 18:56:56 +0000
Subject: [PATCH 141/451] Expand comment about how StringsToBuckets was
 computed, and add more entries

The construction was explained in
https://reviews.llvm.org/D44810?id=139526#inline-391999 but reading the code
shouldn't require hunting down old reviews to understand it.

The precomputed list was missing an entry for the empty list case, and
one entry at the very end. (The current last entry is the last one where
3 * BucketCount fits in a signed int, but the reference implementation
uses unsigneds as far as I can tell, so there's room for one more entry.)

No behavior change for inputs seen in practice.

Differential Revision: https://reviews.llvm.org/D64738

llvm-svn: 366107
---
 .../PDB/Native/PDBStringTableBuilder.cpp          | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
index eabb9d4466942..f7f36901e4d49 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
@@ -60,7 +60,19 @@ static uint32_t computeBucketCount(uint32_t NumStrings) {
   // strings.  Matching the reference algorithm exactly is not strictly
   // necessary for correctness, but it helps when comparing LLD's PDBs with
   // Microsoft's PDBs so as to eliminate superfluous differences.
+  // The reference implementation does (in nmt.h, NMT::grow()):
+  //   unsigned StringCount = 0;
+  //   unsigned BucketCount = 1;
+  //   fn insert() {
+  //     ++StringCount;
+  //     if (BucketCount * 3 / 4 < StringCount)
+  //       BucketCount = BucketCount * 3 / 2 + 1;
+  //   }
+  // This list contains all StringCount, BucketCount pairs where BucketCount was
+  // just incremented.  It ends before the first BucketCount entry where
+  // BucketCount * 3 would overflow a 32-bit unsigned int.
   static std::map<uint32_t, uint32_t> StringsToBuckets = {
+      {0, 1},
       {1, 2},
       {2, 4},
       {4, 7},
@@ -110,7 +122,8 @@ static uint32_t computeBucketCount(uint32_t NumStrings) {
       {229865455, 459730910},
       {344798183, 689596366},
       {517197275, 1034394550},
-      {775795913, 1551591826}};
+      {775795913, 1551591826},
+      {1163693870, 2327387740}};
   auto Entry = StringsToBuckets.lower_bound(NumStrings);
   assert(Entry != StringsToBuckets.end());
   return Entry->second;

From 42e90ed7717e09a1e0fb21027e8346707b60c896 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Mon, 15 Jul 2019 19:04:04 +0000
Subject: [PATCH 142/451] [cmake] Don't set install rules for tblgen if
 building utils is disabled

Summary:
This is a follow up to D64032. Afterwards if building utils is disabled
and cross compilation is attempted, CMake will complain that adding
`install()` directives to targets with EXCLUDE_FROM_ALL set is "undefined".
Indeed, it appears depending on the CMake version and the selected
Generator, the install rule will error because the underlying target isn't
built. Fix that by not adding the install rule if building utils is not
requested. Note that this doesn't prevent building tblgen as a
dependency in not cross-build, even if building tools is disabled.

Reviewed By: smeenai
Differential Revision: https://reviews.llvm.org/D64225

llvm-svn: 366108
---
 llvm/cmake/modules/TableGen.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index f15a23b624555..36c026b5c0f2b 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -147,7 +147,7 @@ macro(add_tablegen target project)
     endif()
   endif()
 
-  if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
+  if (${project} STREQUAL LLVM AND NOT LLVM_INSTALL_TOOLCHAIN_ONLY AND LLVM_BUILD_UTILS)
     set(export_to_llvmexports)
     if(${target} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR
         NOT LLVM_DISTRIBUTION_COMPONENTS)

From 99f2a108707ebf015571101b113fc03b30ad79b9 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomasp@graphcore.ai>
Date: Mon, 15 Jul 2019 19:04:56 +0000
Subject: [PATCH 143/451] [FileCheck] Store line numbers as optional values

Summary:
Processing of command-line definition of variable and logic around
implicit not directives both reuse parsing code that expects a line
number to be defined. So far, a special line number of 0 was used for
those users of the parsing code where a line number does not make sense.
This commit instead represents line numbers as Optional values so that
they can be None for those cases.

Reviewers: jhenderson, chandlerc, jdenny, probinson, grimar, arichardson, rnk

Subscribers: JonChesterfield, rogfer01, hfinkel, kristina, rnk, tra, arichardson, grimar, dblaikie, probinson, llvm-commits, hiraditya

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64639

llvm-svn: 366109
---
 llvm/include/llvm/Support/FileCheck.h    | 46 ++++++++++++------------
 llvm/lib/Support/FileCheck.cpp           | 22 ++++++------
 llvm/unittests/Support/FileCheckTest.cpp | 14 +++++---
 3 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h
index caff50b0ca466..0cd25a71a3b33 100644
--- a/llvm/include/llvm/Support/FileCheck.h
+++ b/llvm/include/llvm/Support/FileCheck.h
@@ -97,20 +97,18 @@ class FileCheckNumericVariable {
   /// Value of numeric variable, if defined, or None otherwise.
   Optional<uint64_t> Value;
 
-  /// Line number where this variable is defined. Used to determine whether a
-  /// variable is defined on the same line as a given use.
-  size_t DefLineNumber;
+  /// Line number where this variable is defined, or None if defined before
+  /// input is parsed. Used to determine whether a variable is defined on the
+  /// same line as a given use.
+  Optional<size_t> DefLineNumber;
 
 public:
-  /// Constructor for a variable \p Name defined at line \p DefLineNumber.
-  FileCheckNumericVariable(size_t DefLineNumber, StringRef Name)
+  /// Constructor for a variable \p Name defined at line \p DefLineNumber or
+  /// defined before input is parsed if DefLineNumber is None.
+  FileCheckNumericVariable(StringRef Name,
+                           Optional<size_t> DefLineNumber = None)
       : Name(Name), DefLineNumber(DefLineNumber) {}
 
-  /// Constructor for numeric variable \p Name with a known \p Value at parse
-  /// time (e.g. the @LINE numeric variable).
-  FileCheckNumericVariable(StringRef Name, uint64_t Value)
-      : Name(Name), Value(Value), DefLineNumber(0) {}
-
   /// \returns name of this numeric variable.
   StringRef getName() const { return Name; }
 
@@ -125,8 +123,9 @@ class FileCheckNumericVariable {
   /// currently defined or not.
   void clearValue();
 
-  /// \returns the line number where this variable is defined.
-  size_t getDefLineNumber() { return DefLineNumber; }
+  /// \returns the line number where this variable is defined, if any, or None
+  /// if defined before input is parsed.
+  Optional<size_t> getDefLineNumber() { return DefLineNumber; }
 };
 
 /// Class representing the use of a numeric variable in the AST of an
@@ -476,13 +475,14 @@ class FileCheckPattern {
 
   Check::FileCheckType CheckTy;
 
-  /// Line number for this CHECK pattern. Used to determine whether a variable
-  /// definition is made on an earlier line to the one with this CHECK.
-  size_t LineNumber;
+  /// Line number for this CHECK pattern or None if it is an implicit pattern.
+  /// Used to determine whether a variable definition is made on an earlier
+  /// line to the one with this CHECK.
+  Optional<size_t> LineNumber;
 
 public:
   FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
-                   size_t Line)
+                   Optional<size_t> Line = None)
       : Context(Context), CheckTy(Ty), LineNumber(Line) {}
 
   /// \returns the location in source code.
@@ -509,13 +509,13 @@ class FileCheckPattern {
   static Expected<VariableProperties> parseVariable(StringRef &Str,
                                                     const SourceMgr &SM);
   /// Parses \p Expr for the name of a numeric variable to be defined at line
-  /// \p LineNumber. \returns a pointer to the class instance representing that
-  /// variable, creating it if needed, or an error holding a diagnostic against
-  /// \p SM should defining such a variable be invalid.
-  static Expected<FileCheckNumericVariable *>
-  parseNumericVariableDefinition(StringRef &Expr,
-                                 FileCheckPatternContext *Context,
-                                 size_t LineNumber, const SourceMgr &SM);
+  /// \p LineNumber or before input is parsed if \p LineNumber is None.
+  /// \returns a pointer to the class instance representing that variable,
+  /// creating it if needed, or an error holding a diagnostic against \p SM
+  /// should defining such a variable be invalid.
+  static Expected<FileCheckNumericVariable *> parseNumericVariableDefinition(
+      StringRef &Expr, FileCheckPatternContext *Context,
+      Optional<size_t> LineNumber, const SourceMgr &SM);
   /// Parses \p Expr for a numeric substitution block. Parameter
   /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
   /// expression. \returns a pointer to the class instance representing the AST
diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp
index 9fb4d798849d7..e0f17787bdf8c 100644
--- a/llvm/lib/Support/FileCheck.cpp
+++ b/llvm/lib/Support/FileCheck.cpp
@@ -124,8 +124,8 @@ char FileCheckNotFoundError::ID = 0;
 
 Expected<FileCheckNumericVariable *>
 FileCheckPattern::parseNumericVariableDefinition(
-    StringRef &Expr, FileCheckPatternContext *Context, size_t LineNumber,
-    const SourceMgr &SM) {
+    StringRef &Expr, FileCheckPatternContext *Context,
+    Optional<size_t> LineNumber, const SourceMgr &SM) {
   Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
   if (!ParseVarResult)
     return ParseVarResult.takeError();
@@ -152,7 +152,7 @@ FileCheckPattern::parseNumericVariableDefinition(
   if (VarTableIter != Context->GlobalNumericVariableTable.end())
     DefinedNumericVariable = VarTableIter->second;
   else
-    DefinedNumericVariable = Context->makeNumericVariable(LineNumber, Name);
+    DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber);
 
   return DefinedNumericVariable;
 }
@@ -177,11 +177,12 @@ FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo,
   if (VarTableIter != Context->GlobalNumericVariableTable.end())
     NumericVariable = VarTableIter->second;
   else {
-    NumericVariable = Context->makeNumericVariable(0, Name);
+    NumericVariable = Context->makeNumericVariable(Name);
     Context->GlobalNumericVariableTable[Name] = NumericVariable;
   }
 
-  if (!IsPseudo && NumericVariable->getDefLineNumber() == LineNumber)
+  Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
+  if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
     return FileCheckErrorDiagnostic::get(
         SM, Name,
         "numeric variable '" + Name + "' defined on the same line as used");
@@ -620,7 +621,8 @@ Expected<size_t> FileCheckPattern::match(StringRef Buffer, size_t &MatchLen,
   std::string TmpStr;
   if (!Substitutions.empty()) {
     TmpStr = RegExStr;
-    Context->LineVariable->setValue(LineNumber);
+    if (LineNumber)
+      Context->LineVariable->setValue(*LineNumber);
 
     size_t InsertOffset = 0;
     // Substitute all string variables and expressions whose values are only
@@ -1102,7 +1104,7 @@ FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
 void FileCheckPatternContext::createLineVariable() {
   assert(!LineVariable && "@LINE pseudo numeric variable already created");
   StringRef LineName = "@LINE";
-  LineVariable = makeNumericVariable(0, LineName);
+  LineVariable = makeNumericVariable(LineName);
   GlobalNumericVariableTable[LineName] = LineVariable;
 }
 
@@ -1131,7 +1133,7 @@ bool FileCheck::ReadCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
 
     ImplicitNegativeChecks.push_back(
-        FileCheckPattern(Check::CheckNot, &PatternContext, 0));
+        FileCheckPattern(Check::CheckNot, &PatternContext));
     ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
                                                "IMPLICIT-CHECK", SM, Req);
   }
@@ -1790,8 +1792,8 @@ Error FileCheckPatternContext::defineCmdlineVariables(
     if (CmdlineDef[0] == '#') {
       StringRef CmdlineName = CmdlineDef.substr(1, EqIdx - 1);
       Expected<FileCheckNumericVariable *> ParseResult =
-          FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this, 0,
-                                                           SM);
+          FileCheckPattern::parseNumericVariableDefinition(CmdlineName, this,
+                                                           None, SM);
       if (!ParseResult) {
         Errs = joinErrors(std::move(Errs), ParseResult.takeError());
         continue;
diff --git a/llvm/unittests/Support/FileCheckTest.cpp b/llvm/unittests/Support/FileCheckTest.cpp
index 8df4603dbd118..2275d7229997a 100644
--- a/llvm/unittests/Support/FileCheckTest.cpp
+++ b/llvm/unittests/Support/FileCheckTest.cpp
@@ -56,7 +56,7 @@ static void expectUndefError(const Twine &ExpectedUndefVarName, Error Err) {
 TEST_F(FileCheckTest, NumericVariable) {
   // Undefined variable: getValue and eval fail, error returned by eval holds
   // the name of the undefined variable and setValue does not trigger assert.
-  FileCheckNumericVariable FooVar = FileCheckNumericVariable(1, "FOO");
+  FileCheckNumericVariable FooVar = FileCheckNumericVariable("FOO", 1);
   EXPECT_EQ("FOO", FooVar.getName());
   FileCheckNumericVariableUse FooVarUse =
       FileCheckNumericVariableUse("FOO", &FooVar);
@@ -87,10 +87,12 @@ TEST_F(FileCheckTest, NumericVariable) {
 uint64_t doAdd(uint64_t OpL, uint64_t OpR) { return OpL + OpR; }
 
 TEST_F(FileCheckTest, Binop) {
-  FileCheckNumericVariable FooVar = FileCheckNumericVariable("FOO", 42);
+  FileCheckNumericVariable FooVar = FileCheckNumericVariable("FOO");
+  FooVar.setValue(42);
   std::unique_ptr<FileCheckNumericVariableUse> FooVarUse =
       llvm::make_unique<FileCheckNumericVariableUse>("FOO", &FooVar);
-  FileCheckNumericVariable BarVar = FileCheckNumericVariable("BAR", 18);
+  FileCheckNumericVariable BarVar = FileCheckNumericVariable("BAR");
+  BarVar.setValue(18);
   std::unique_ptr<FileCheckNumericVariableUse> BarVarUse =
       llvm::make_unique<FileCheckNumericVariableUse>("BAR", &BarVar);
   FileCheckASTBinop Binop =
@@ -407,8 +409,10 @@ TEST_F(FileCheckTest, Substitution) {
 
   // Substitutions of defined pseudo and non-pseudo numeric variables return
   // the right value.
-  FileCheckNumericVariable LineVar = FileCheckNumericVariable("@LINE", 42);
-  FileCheckNumericVariable NVar = FileCheckNumericVariable("N", 10);
+  FileCheckNumericVariable LineVar = FileCheckNumericVariable("@LINE");
+  LineVar.setValue(42);
+  FileCheckNumericVariable NVar = FileCheckNumericVariable("N");
+  NVar.setValue(10);
   auto LineVarUse =
       llvm::make_unique<FileCheckNumericVariableUse>("@LINE", &LineVar);
   auto NVarUse = llvm::make_unique<FileCheckNumericVariableUse>("N", &NVar);

From ba4373ea7d9c816232d44061230f5dca9582b4b7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:12:00 +0000
Subject: [PATCH 144/451] AMDGPU: Fix missing immarg from interp intrinsics

llvm-svn: 366110
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      | 10 +-
 llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll | 96 +++++++++++++++++++
 2 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index e92a6078ce479..1cde3afd69e1d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1191,7 +1191,7 @@ def int_amdgcn_interp_mov :
   GCCBuiltin<"__builtin_amdgcn_interp_mov">,
   Intrinsic<[llvm_float_ty],
             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>;
 
 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
 // This intrinsic reads from lds, but the memory values are constant,
@@ -1200,14 +1200,14 @@ def int_amdgcn_interp_p1 :
   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>]>;
 
 // __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
 def int_amdgcn_interp_p2 :
   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>]>;
           // See int_amdgcn_v_interp_p1 for why this is IntrNoMem.
 
 // __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0>
@@ -1215,14 +1215,14 @@ def int_amdgcn_interp_p1_f16 :
   GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">,
   Intrinsic<[llvm_float_ty],
             [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<1>, ImmArg<2>, ImmArg<3>]>;
 
 // __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0>
 def int_amdgcn_interp_p2_f16 :
   GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">,
   Intrinsic<[llvm_half_ty],
             [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
+            [IntrNoMem, IntrSpeculatable, ImmArg<2>, ImmArg<3>, ImmArg<4>]>;
 
 // Pixel shaders only: whether the current pixel is live (i.e. not a helper
 // invocation for derivative computation).
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
index 9b8704b761493..a72d1da68a212 100644
--- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
@@ -578,3 +578,99 @@ define i32 @test_permlanex16(i32 addrspace(1)* %out, i32 %arg0, i32 %arg1, i32 %
   %v2 = call i32 @llvm.amdgcn.permlanex16(i32 %v2, i32 %arg0, i32 %arg1, i32 %arg2, i1 false, i1 %arg4)
   ret i32 %v2
 }
+
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
+define void @test_interp_p1(float %arg0, i32 %arg1, i32 %arg2, i32 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg1
+  ; CHECK-NEXT: %val0 = call float @llvm.amdgcn.interp.p1(float %arg0, i32 %arg1, i32 0, i32 0)
+  %val0 = call float @llvm.amdgcn.interp.p1(float %arg0, i32 %arg1, i32 0, i32 0)
+  store volatile float %val0, float addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %val1 = call float @llvm.amdgcn.interp.p1(float %arg0, i32 0, i32 %arg2, i32 0)
+  %val1 = call float @llvm.amdgcn.interp.p1(float %arg0, i32 0, i32 %arg2, i32 0)
+  store volatile float %val1, float addrspace(1)* undef
+  ret void
+}
+
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32)
+define void @test_interp_p2(float %arg0, float %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %val0 = call float @llvm.amdgcn.interp.p2(float %arg0, float %arg1, i32 %arg2, i32 0, i32 0)
+
+  %val0 = call float @llvm.amdgcn.interp.p2(float %arg0, float %arg1, i32 %arg2, i32 0, i32 0)
+  store volatile float %val0, float addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg3
+  ; CHECK-NEXT: %val1 = call float @llvm.amdgcn.interp.p2(float %arg0, float %arg1, i32 0, i32 %arg3, i32 0)
+  %val1 = call float @llvm.amdgcn.interp.p2(float %arg0, float %arg1, i32 0, i32 %arg3, i32 0)
+  store volatile float %val1, float addrspace(1)* undef
+  ret void
+}
+
+declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32)
+define void @test_interp_mov(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg1
+  ; CHECK-NEXT: %val0 = call float @llvm.amdgcn.interp.mov(i32 %arg0, i32 %arg1, i32 0, i32 0)
+  %val0 = call float @llvm.amdgcn.interp.mov(i32 %arg0, i32 %arg1, i32 0, i32 0)
+  store volatile float %val0, float addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %val1 = call float @llvm.amdgcn.interp.mov(i32 %arg0, i32 0, i32 %arg2, i32 0)
+  %val1 = call float @llvm.amdgcn.interp.mov(i32 %arg0, i32 0, i32 %arg2, i32 0)
+  store volatile float %val1, float addrspace(1)* undef
+
+  ret void
+}
+
+declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
+define void @test_interp_p1_f16(float %arg0, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg1
+  ; CHECK-NEXT:%val0 = call float @llvm.amdgcn.interp.p1.f16(float %arg0, i32 %arg1, i32 2, i1 false, i32 %arg4)
+  %val0 = call float @llvm.amdgcn.interp.p1.f16(float %arg0, i32 %arg1, i32 2, i1 0, i32 %arg4)
+  store volatile float %val0, float addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT:i32 %arg2
+  ; CHECK-NEXT:  %val1 = call float @llvm.amdgcn.interp.p1.f16(float %arg0, i32 0, i32 %arg2, i1 false, i32 %arg4)
+  %val1 = call float @llvm.amdgcn.interp.p1.f16(float %arg0, i32 0, i32 %arg2, i1 0, i32 %arg4)
+  store volatile float %val1, float addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT:i1 %arg3
+  ; CHECK-NEXT:  %val2 = call float @llvm.amdgcn.interp.p1.f16(float %arg0, i32 0, i32 0, i1 %arg3, i32 %arg4)
+  %val2 = call float @llvm.amdgcn.interp.p1.f16(float %arg0, i32 0, i32 0, i1 %arg3, i32 %arg4)
+  store volatile float %val2, float addrspace(1)* undef
+
+  ret void
+}
+
+declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32)
+define void @test_interp_p2_f16(float %arg0, float %arg1, i32 %arg2, i32 %arg3, i1 %arg4, i32 %arg5) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg2
+  ; CHECK-NEXT: %val0 = call half @llvm.amdgcn.interp.p2.f16(float %arg0, float %arg1, i32 %arg2, i32 2, i1 false, i32 %arg5)
+  %val0 = call half @llvm.amdgcn.interp.p2.f16(float %arg0, float %arg1, i32 %arg2, i32 2, i1 false, i32 %arg5)
+  store volatile half %val0, half addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg3
+  ; CHECK-NEXT: %val1 = call half @llvm.amdgcn.interp.p2.f16(float %arg0, float %arg1, i32 0, i32 %arg3, i1 false, i32 %arg5)
+  %val1 = call half @llvm.amdgcn.interp.p2.f16(float %arg0, float %arg1, i32 0, i32 %arg3, i1 false, i32 %arg5)
+  store volatile half %val1, half addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i1 %arg4
+  ; CHECK-NEXT: %val2 = call half @llvm.amdgcn.interp.p2.f16(float %arg0, float %arg1, i32 0, i32 0, i1 %arg4, i32 %arg5)
+  %val2 = call half @llvm.amdgcn.interp.p2.f16(float %arg0, float %arg1, i32 0, i32 0, i1 %arg4, i32 %arg5)
+  store volatile half %val2, half addrspace(1)* undef
+
+  ret void
+}

From 434d664095b2417453c9e86943c9a94ae1d4bd47 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:37:34 +0000
Subject: [PATCH 145/451] GlobalISel: Implement narrowScalar for vector
 extract/insert indexes

llvm-svn: 366113
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 11 +++++
 .../legalize-extract-vector-elt.mir           | 19 ++++++++
 .../GlobalISel/legalize-insert-vector-elt.mir | 46 ++++++++++++++++++-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d13b1a53f61de..bf3cca4115a13 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -736,6 +736,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+  case TargetOpcode::G_INSERT_VECTOR_ELT: {
+    if (TypeIdx != 2)
+      return UnableToLegalize;
+
+    int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+    Observer.changingInstr(MI);
+    narrowScalarSrc(MI, NarrowTy, OpIdx);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
   }
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
index 2fdde786b14dc..682eafb2ea4b1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -646,3 +646,22 @@ body: |
     %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
 ...
+
+---
+name: extract_vector_elt_v2i32_varidx_i64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: extract_vector_elt_v2i32_varidx_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[TRUNC]](s32)
+    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index d55f67262c51e..2ca336febe9b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -2,12 +2,12 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
 
 ---
-name: insert_vector_elt_0_v2i32
+name: insert_vector_elt_0_v2s32
 
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2
-    ; CHECK-LABEL: name: insert_vector_elt_0_v2i32
+    ; CHECK-LABEL: name: insert_vector_elt_0_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -19,3 +19,45 @@ body: |
     %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
     $vgpr0_vgpr1 = COPY %3
 ...
+
+---
+name: insert_vector_elt_v2s32_varidx_i64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4
+
+    ; CHECK-LABEL: name: insert_vector_elt_v2s32_varidx_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s64) = COPY $vgpr3_vgpr4
+    %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: insert_vector_elt_v16s32_varidx_i64
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17_vgpr18
+
+    ; CHECK-LABEL: name: insert_vector_elt_v16s32_varidx_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
+    ; CHECK: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s32) = COPY $vgpr16
+    %2:_(s64) = COPY $vgpr17_vgpr18
+    %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    S_ENDPGM 0, implicit %3
+...

From 5dfd4660329054f492583939ba69335a352b89dc Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:39:31 +0000
Subject: [PATCH 146/451] AMDGPU/GlobalISel: Fix G_ICMP for wave32

llvm-svn: 366114
---
 .../lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp |  4 ++--
 .../CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir  | 13 +++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 317a9b5c08ccb..5a73b0d113981 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -583,7 +583,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  DebugLoc DL = I.getDebugLoc();
+  const DebugLoc &DL = I.getDebugLoc();
 
   unsigned SrcReg = I.getOperand(2).getReg();
   unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
@@ -616,7 +616,7 @@ bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
             .add(I.getOperand(2))
             .add(I.getOperand(3));
   RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
-                               AMDGPU::SReg_64RegClass, MRI);
+                               *TRI.getBoolRC(), MRI);
   bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
   I.eraseFromParent();
   return Ret;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
index 04769922fdc22..d2fe30a084526 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-icmp.mir
@@ -1,5 +1,6 @@
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_64_xexec %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_64_xexec %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN -DVCCRC=sreg_32_xm0_xexec %s
 
 ---
 name:            icmp_s32_s_mix
@@ -129,8 +130,8 @@ regBankSelected: true
 # GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
 # GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
 # GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
-# GCN: [[COND0:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
-# GCN: [[COND1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]]
+# GCN: [[COND0:%[0-9]+]]:[[VCCRC]] = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
+# GCN: [[COND1:%[0-9]+]]:[[VCCRC]] = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]]
 # GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]]
 # GCN: V_CNDMASK_B32_e64 0, [[VGPR5]], 0, [[VGPR4]], [[COND1]]
 
@@ -339,8 +340,8 @@ legalized:       true
 regBankSelected: true
 
 # GCN-LABEL: name: icmp_eq_ne_p3_vv
-# GCN: %2:sreg_64_xexec = V_CMP_EQ_U32_e64 %0, %1, implicit $exec
-# GCN: %3:sreg_64_xexec = V_CMP_NE_U32_e64 %0, %1, implicit $exec
+# GCN: %2:[[VCCRC]] = V_CMP_EQ_U32_e64 %0, %1, implicit $exec
+# GCN: %3:[[VCCRC]] = V_CMP_NE_U32_e64 %0, %1, implicit $exec
 
 body: |
   bb.0:

From b0e04c018c37753f1eb3c8e84c4711cc78b41934 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:40:59 +0000
Subject: [PATCH 147/451] AMDGPU/GlobalISel: Custom legalize
 G_EXTRACT_VECTOR_ELT

Turn the constant cases into G_EXTRACTs.

llvm-svn: 366115
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  35 ++-
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h  |   2 +
 .../legalize-extract-vector-elt.mir           | 201 +++++++++---------
 3 files changed, 135 insertions(+), 103 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 090208e4c309c..9bee38f1424f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -633,7 +633,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     unsigned IdxTypeIdx = 2;
 
     getActionDefinitionsBuilder(Op)
-      .legalIf([=](const LegalityQuery &Query) {
+      .customIf([=](const LegalityQuery &Query) {
           const LLT EltTy = Query.Types[EltTypeIdx];
           const LLT VecTy = Query.Types[VecTypeIdx];
           const LLT IdxTy = Query.Types[IdxTypeIdx];
@@ -790,6 +790,10 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
   case TargetOpcode::G_FMINNUM_IEEE:
   case TargetOpcode::G_FMAXNUM_IEEE:
     return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+    return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
+  case TargetOpcode::G_INSERT_VECTOR_ELT:
+    return true; // TODO
   default:
     return false;
   }
@@ -1121,6 +1125,35 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
   return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
 }
 
+bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  // TODO: Should move some of this into LegalizerHelper.
+
+  // TODO: Promote dynamic indexing of s16 to s32
+  // TODO: Dynamic s64 indexing is only legal for SGPR.
+  Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+  if (!IdxVal) // Dynamic case will be selected to register indexing.
+    return true;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Vec = MI.getOperand(1).getReg();
+
+  LLT VecTy = MRI.getType(Vec);
+  LLT EltTy = VecTy.getElementType();
+  assert(EltTy == MRI.getType(Dst));
+
+  B.setInstr(MI);
+
+  if (IdxVal.getValue() < VecTy.getNumElements())
+    B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
+  else
+    B.buildUndef(Dst);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 // Return the use branch instruction, otherwise null if the usage is invalid.
 static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
                                        MachineRegisterInfo &MRI) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index f5a6ff7d96c72..b858342494d3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -51,6 +51,8 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
                      MachineIRBuilder &MIRBuilder, bool Signed) const;
   bool legalizeMinNumMaxNum(MachineInstr &MI, MachineRegisterInfo &MRI,
                             MachineIRBuilder &MIRBuilder) const;
+  bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                MachineIRBuilder &MIRBuilder) const;
 
   Register getLiveInRegister(MachineRegisterInfo &MRI,
                              Register Reg, LLT Ty) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
index 682eafb2ea4b1..ba8472648af06 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -9,15 +9,44 @@ body: |
     liveins: $vgpr0_vgpr1
     ; CHECK-LABEL: name: extract_vector_elt_0_v2i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_CONSTANT i32 0
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0 = COPY %2
 ...
 ---
+name: extract_vector_elt_1_v2i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: extract_vector_elt_1_v2i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
+---
+name: extract_vector_elt_2_v2i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: extract_vector_elt_2_v2i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+    $vgpr0 = COPY %2
+...
+---
 name: extract_vector_elt_0_v3i32
 
 body: |
@@ -25,9 +54,8 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2
     ; CHECK-LABEL: name: extract_vector_elt_0_v3i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<3 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_CONSTANT i32 0
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -41,9 +69,8 @@ body: |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK-LABEL: name: extract_vector_elt_0_v4i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = G_CONSTANT i32 0
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -58,10 +85,8 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: extract_vector_elt_0_v5i32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<5 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(<5 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0
     %2:_(s32) = G_CONSTANT i32 0
@@ -77,10 +102,8 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: extract_vector_elt_0_v6i32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<6 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(<6 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0
     %2:_(s32) = G_CONSTANT i32 0
@@ -96,10 +119,8 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: extract_vector_elt_0_v7i32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<7 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(<7 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0
     %2:_(s32) = G_CONSTANT i32 0
@@ -115,10 +136,8 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: extract_vector_elt_0_v8i32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<8 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(<8 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0
     %2:_(s32) = G_CONSTANT i32 0
@@ -134,10 +153,8 @@ body: |
     liveins: $vgpr0
     ; CHECK-LABEL: name: extract_vector_elt_0_v16i32
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<16 x s32>), [[C]](s32)
-    ; CHECK: $vgpr0 = COPY [[EVEC]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(<16 x s32>) = G_BUILD_VECTOR %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0, %0
     %2:_(s32) = G_CONSTANT i32 0
@@ -188,17 +205,16 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32
     ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>)
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
     ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[C]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(<2 x s8>) = G_IMPLICIT_DEF
     %1:_(s32) = G_CONSTANT i32 0
@@ -215,9 +231,8 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_0_v2i16_i32
     ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](<2 x s16>), [[C]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[DEF]](<2 x s16>), 0
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
     ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<2 x s16>) = G_IMPLICIT_DEF
     %1:_(s32) = G_CONSTANT i32 0
@@ -234,17 +249,16 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32
     ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>)
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
     ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[C]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(<2 x s1>) = G_IMPLICIT_DEF
     %1:_(s32) = G_CONSTANT i32 0
@@ -403,9 +417,8 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx0_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[C]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 0
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
     ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s32) = G_CONSTANT i32 0
@@ -423,9 +436,8 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx1_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[C]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[COPY]](<2 x s16>), 16
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16)
     ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s32) = G_CONSTANT i32 1
@@ -443,10 +455,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v2s16_idx2_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s16>), [[C]](s32)
-    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s16)
-    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(s32) = G_CONSTANT i32 2
     %2:_(s16) = G_EXTRACT_VECTOR_ELT %0, %1
@@ -494,19 +505,18 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
-    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
     ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_CONSTANT i32 0
@@ -525,19 +535,18 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
-    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
     ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 32
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_CONSTANT i32 1
@@ -556,19 +565,18 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
-    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C]](s32)
+    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32)
     ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_CONSTANT i32 2
@@ -587,20 +595,9 @@ body: |
 
     ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx3_i32
     ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY [[COPY]](<3 x s32>)
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C1]](s32)
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[C1]](s32)
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[UV2]], [[C1]](s32)
-    ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C1]](s32)
-    ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C1]](s32)
-    ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C1]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ASHR]](s32), [[ASHR1]](s32), [[ASHR2]](s32)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(s32) = G_CONSTANT i32 3
     %2:_(<3 x s16>) = G_TRUNC %0

From 6ed315f89be1b5e692b1d8e2f5ddda08d5b4e47d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:43:04 +0000
Subject: [PATCH 148/451] AMDGPU/GlobalISel: Custom legalize
 G_INSERT_VECTOR_ELT

llvm-svn: 366116
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 32 ++++++++++++++-
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h  |  2 +
 .../GlobalISel/legalize-insert-vector-elt.mir | 41 +++++++++++++++++--
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 9bee38f1424f5..3d1f7f404c918 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -793,7 +793,7 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
     return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
   case TargetOpcode::G_INSERT_VECTOR_ELT:
-    return true; // TODO
+    return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
   default:
     return false;
   }
@@ -1154,6 +1154,36 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
   return true;
 }
 
+bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
+  MachineInstr &MI, MachineRegisterInfo &MRI,
+  MachineIRBuilder &B) const {
+  // TODO: Should move some of this into LegalizerHelper.
+
+  // TODO: Promote dynamic indexing of s16 to s32
+  // TODO: Dynamic s64 indexing is only legal for SGPR.
+  Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
+  if (!IdxVal) // Dynamic case will be selected to register indexing.
+    return true;
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Vec = MI.getOperand(1).getReg();
+  Register Ins = MI.getOperand(2).getReg();
+
+  LLT VecTy = MRI.getType(Vec);
+  LLT EltTy = VecTy.getElementType();
+  assert(EltTy == MRI.getType(Ins));
+
+  B.setInstr(MI);
+
+  if (IdxVal.getValue() < VecTy.getNumElements())
+    B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
+  else
+    B.buildUndef(Dst);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 // Return the use branch instruction, otherwise null if the usage is invalid.
 static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
                                        MachineRegisterInfo &MRI) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index b858342494d3a..3f1cc1d265dd7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -53,6 +53,8 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
                             MachineIRBuilder &MIRBuilder) const;
   bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
                                 MachineIRBuilder &MIRBuilder) const;
+  bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
+                               MachineIRBuilder &MIRBuilder) const;
 
   Register getLiveInRegister(MachineRegisterInfo &MRI,
                              Register Reg, LLT Ty) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index 2ca336febe9b2..75759b3e7548a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -10,9 +10,8 @@ body: |
     ; CHECK-LABEL: name: insert_vector_elt_0_v2s32
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s32) = G_CONSTANT i32 0
@@ -20,6 +19,42 @@ body: |
     $vgpr0_vgpr1 = COPY %3
 ...
 
+---
+name: insert_vector_elt_1_v2s32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+    ; CHECK-LABEL: name: insert_vector_elt_1_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32
+    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: insert_vector_elt_2_v2s32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+    ; CHECK-LABEL: name: insert_vector_elt_2_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s32) = G_CONSTANT i32 2
+    %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
 ---
 name: insert_vector_elt_v2s32_varidx_i64
 

From 18b7133843bdfdfbad0f981c8cce944426e8411c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:44:07 +0000
Subject: [PATCH 149/451] AMDGPU/GlobalISel: Fix handling of sgpr (not scc
 bank) s1 to VCC

This was emitting a copy from a 32-bit register to a 64-bit.

llvm-svn: 366117
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 40 ++++++++++-------
 .../AMDGPU/GlobalISel/inst-select-copy.mir    | 45 +++++++++++++++----
 2 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5a73b0d113981..0d02e738b4ba5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -86,8 +86,9 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg,
   const TargetRegisterClass *RC =
       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
   if (RC) {
+    const LLT Ty = MRI.getType(Reg);
     return RC->hasSuperClassEq(TRI.getBoolRC()) &&
-           MRI.getType(Reg).getSizeInBits() == 1;
+           Ty.isValid() && Ty.getSizeInBits() == 1;
   }
 
   const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
@@ -95,29 +96,34 @@ bool AMDGPUInstructionSelector::isVCC(Register Reg,
 }
 
 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
+  const DebugLoc &DL = I.getDebugLoc();
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   I.setDesc(TII.get(TargetOpcode::COPY));
 
-  // Special case for COPY from the scc register bank.  The scc register bank
-  // is modeled using 32-bit sgprs.
   const MachineOperand &Src = I.getOperand(1);
-  unsigned SrcReg = Src.getReg();
-  if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
-    unsigned DstReg = I.getOperand(0).getReg();
-
-    // Specially handle scc->vcc copies.
-    if (isVCC(DstReg, MRI)) {
-      const DebugLoc &DL = I.getDebugLoc();
-      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
-        .addImm(0)
-        .addReg(SrcReg);
-      if (!MRI.getRegClassOrNull(SrcReg))
-        MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
-      I.eraseFromParent();
-      return true;
+  MachineOperand &Dst = I.getOperand(0);
+  Register DstReg = Dst.getReg();
+  Register SrcReg = Src.getReg();
+
+  if (isVCC(DstReg, MRI)) {
+    if (SrcReg == AMDGPU::SCC) {
+      const TargetRegisterClass *RC
+        = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+      if (!RC)
+        return true;
+      return RBI.constrainGenericRegister(DstReg, *RC, MRI);
     }
+
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+      .addImm(0)
+      .addReg(SrcReg);
+
+    if (!MRI.getRegClassOrNull(SrcReg))
+      MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+    I.eraseFromParent();
+    return true;
   }
 
   for (const MachineOperand &MO : I.operands()) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 6722f1608e2b7..1e20e6c76ebd5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -148,9 +148,6 @@ regBankSelected: true
 body: |
   bb.0:
     liveins: $sgpr0
-    ; GCN-LABEL: name: copy_sgpr_no_type
-    ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
-    ; GCN: S_ENDPGM 0, implicit [[COPY]]
     ; WAVE64-LABEL: name: copy_sgpr_no_type
     ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; WAVE64: S_ENDPGM 0, implicit [[COPY]]
@@ -174,9 +171,6 @@ regBankSelected: true
 body: |
   bb.0:
     liveins: $vgpr0
-    ; GCN-LABEL: name: copy_vgpr_no_type
-    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: S_ENDPGM 0, implicit [[COPY]]
     ; WAVE64-LABEL: name: copy_vgpr_no_type
     ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; WAVE64: S_ENDPGM 0, implicit [[COPY]]
@@ -200,9 +194,6 @@ regBankSelected: true
 body: |
   bb.0:
     liveins: $sgpr0_sgpr1
-    ; GCN-LABEL: name: copy_maybe_vcc
-    ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
-    ; GCN: S_ENDPGM 0, implicit [[COPY]]
     ; WAVE64-LABEL: name: copy_maybe_vcc
     ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
     ; WAVE64: S_ENDPGM 0, implicit [[COPY]]
@@ -215,3 +206,39 @@ body: |
     S_ENDPGM 0, implicit %1
 
 ...
+
+---
+
+name:            copy_sgpr_s1_to_vcc
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  ; WAVE64-LABEL: name: copy_sgpr_s1_to_vcc
+  ; WAVE64: bb.0:
+  ; WAVE64:   successors: %bb.1(0x80000000)
+  ; WAVE64:   [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+  ; WAVE64:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+  ; WAVE64:   $vcc = COPY [[V_CMP_NE_U32_e64_]]
+  ; WAVE64:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; WAVE64: bb.1:
+  ; WAVE32-LABEL: name: copy_sgpr_s1_to_vcc
+  ; WAVE32: bb.0:
+  ; WAVE32:   successors: %bb.1(0x80000000)
+  ; WAVE32:   $vcc_hi = IMPLICIT_DEF
+  ; WAVE32:   [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+  ; WAVE32:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+  ; WAVE32:   $vcc_lo = COPY [[V_CMP_NE_U32_e64_]]
+  ; WAVE32:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
+  ; WAVE32: bb.1:
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:vcc(s1) = COPY %1
+    G_BRCOND %2, %bb.1
+
+  bb.1:
+
+...

From 3bfdb54d88d71b8cd8594d22d4b3712ee5661aa5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:45:49 +0000
Subject: [PATCH 150/451] AMDGPU/GlobalISel: Fix not constraining result reg of
 copies to VCC

llvm-svn: 366118
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |  4 +++
 .../AMDGPU/GlobalISel/inst-select-copy.mir    | 26 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 0d02e738b4ba5..f916154c31911 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -116,6 +116,10 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
       return RBI.constrainGenericRegister(DstReg, *RC, MRI);
     }
 
+    // TODO: Should probably leave the copy and let copyPhysReg expand it.
+    if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
+      return false;
+
     BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
       .addImm(0)
       .addReg(SrcReg);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 1e20e6c76ebd5..548dde263fffa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -242,3 +242,29 @@ body: |
   bb.1:
 
 ...
+
+---
+
+name:            copy_sgpr_s1_to_vcc_constrain
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; WAVE64-LABEL: name: copy_sgpr_s1_to_vcc_constrain
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    ; WAVE32-LABEL: name: copy_sgpr_s1_to_vcc_constrain
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:vcc(s1) = COPY %1
+    S_ENDPGM 0, implicit %2
+
+...

From e1b52f418033548601a7839cbbb8ceddc0751521 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:46:48 +0000
Subject: [PATCH 151/451] AMDGPU/GlobalISel: Fix selecting vcc->vcc bank copies

The extra test change is correct, although how it arrives there is a
bug that needs work. With wave32, the test for isVCC ambiguously
reports true for an SCC or VCC source. A new allocatable pseudo
register class for SCC may be necesssary.

llvm-svn: 366119
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 22 ++++++------
 .../AMDGPU/GlobalISel/inst-select-copy.mir    | 34 +++++++++++++++++--
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f916154c31911..f5a742b1e2256 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -116,18 +116,20 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
       return RBI.constrainGenericRegister(DstReg, *RC, MRI);
     }
 
-    // TODO: Should probably leave the copy and let copyPhysReg expand it.
-    if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
-      return false;
+    if (!isVCC(SrcReg, MRI)) {
+      // TODO: Should probably leave the copy and let copyPhysReg expand it.
+      if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
+        return false;
 
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
-      .addImm(0)
-      .addReg(SrcReg);
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+        .addImm(0)
+        .addReg(SrcReg);
 
-    if (!MRI.getRegClassOrNull(SrcReg))
-      MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
-    I.eraseFromParent();
-    return true;
+      if (!MRI.getRegClassOrNull(SrcReg))
+        MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
+      I.eraseFromParent();
+      return true;
+    }
   }
 
   for (const MachineOperand &MO : I.operands()) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 548dde263fffa..81ffcd5464c48 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -90,10 +90,10 @@ body: |
     ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
     ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+    ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]]
+    ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
-    ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
-    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
-    ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
+    ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
     ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -268,3 +268,31 @@ body: |
     S_ENDPGM 0, implicit %2
 
 ...
+
+---
+
+name:            copy_s1_vcc_to_vcc
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:vcc(s1) = COPY %1
+    %3:vcc(s1) = COPY %2
+    S_ENDPGM 0, implicit %3
+
+...

From ad19b50c000762c2c7c50ad8f171cbc60ca060d0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:48:36 +0000
Subject: [PATCH 152/451] AMDGPU/GlobalISel: Don't constrain source register of
 VCC copies

This is a hack until I come up with a better way of dealing with the
pseudo-register banks used for boolean values. If the use instruction
constrains the register, the selector for the def instruction won't
see that the bank was VCC. A 1-bit SReg_32 is could ambiguously have
been SCCRegBank or VCCRegBank in wave32.

This is necessary to successfully select branches with and and/or/xor
condition.

llvm-svn: 366120
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 20 ++++++++++++
 .../AMDGPU/GlobalISel/inst-select-copy.mir    | 31 ++++++++++++++++---
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f5a742b1e2256..e57478b00ee0d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -130,6 +130,26 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
       I.eraseFromParent();
       return true;
     }
+
+    const TargetRegisterClass *RC =
+      TRI.getConstrainedRegClassForOperand(Dst, MRI);
+    if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
+      return false;
+
+    // Don't constrain the source register to a class so the def instruction
+    // handles it (unless it's undef).
+    //
+    // FIXME: This is a hack. When selecting the def, we neeed to know
+    // specifically know that the result is VCCRegBank, and not just an SGPR
+    // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
+    if (Src.isUndef()) {
+      const TargetRegisterClass *SrcRC =
+        TRI.getConstrainedRegClassForOperand(Src, MRI);
+      if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
+        return false;
+    }
+
+    return true;
   }
 
   for (const MachineOperand &MO : I.operands()) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 81ffcd5464c48..2f2ad31cd0ad7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -282,13 +282,15 @@ body: |
 
     ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc
     ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
-    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
-    ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_NE_U32_e64_]]
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
     ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
-    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
-    ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_NE_U32_e64_]]
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[V_CMP_NE_U32_e64_]]
+    ; WAVE32: S_ENDPGM 0, implicit [[COPY1]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:vcc(s1) = COPY %1
@@ -296,3 +298,24 @@ body: |
     S_ENDPGM 0, implicit %3
 
 ...
+
+---
+
+name:            copy_s1_vcc_to_vcc_undef
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; WAVE64-LABEL: name: copy_s1_vcc_to_vcc_undef
+    ; WAVE64: S_ENDPGM 0, implicit %1:sreg_64_xexec
+    ; WAVE32-LABEL: name: copy_s1_vcc_to_vcc_undef
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: S_ENDPGM 0, implicit %1:sreg_32_xm0_xexec
+    %1:vcc(s1) = COPY undef  %0:vcc(s1)
+    S_ENDPGM 0, implicit %1
+
+...

From c8291c94f8393162241a798df49ba53fddc49976 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 19:50:07 +0000
Subject: [PATCH 153/451] AMDGPU/GlobalISel: Select G_AND/G_OR/G_XOR

llvm-svn: 366121
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |  65 ++
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |   1 +
 .../AMDGPU/GlobalISel/inst-select-and.mir     | 593 +++++++++++++++++
 .../AMDGPU/GlobalISel/inst-select-or.mir      | 600 +++++++++++++++++-
 .../AMDGPU/GlobalISel/inst-select-xor.mir     | 593 +++++++++++++++++
 5 files changed, 1828 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e57478b00ee0d..fd5ee293d6cfb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -67,6 +67,8 @@ static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
   const TargetRegisterClass *RC =
       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
   if (RC) {
+    // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
+    // context of the register bank has been lost.
     if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
       return false;
     const LLT Ty = MRI.getType(Reg);
@@ -244,6 +246,63 @@ static int64_t getConstant(const MachineInstr *MI) {
   return MI->getOperand(1).getCImm()->getSExtValue();
 }
 
+static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
+  switch (Opc) {
+  case AMDGPU::G_AND:
+    return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+  case AMDGPU::G_OR:
+    return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
+  case AMDGPU::G_XOR:
+    return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
+  default:
+    llvm_unreachable("not a bit op");
+  }
+}
+
+bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineOperand &Dst = I.getOperand(0);
+  MachineOperand &Src0 = I.getOperand(1);
+  MachineOperand &Src1 = I.getOperand(2);
+  Register DstReg = Dst.getReg();
+  unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+  const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+  if (DstRB->getID() == AMDGPU::VCCRegBankID) {
+    const TargetRegisterClass *RC = TRI.getBoolRC();
+    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
+                                           RC == &AMDGPU::SReg_64RegClass);
+    I.setDesc(TII.get(InstOpc));
+
+    // FIXME: Hack to avoid turning the register bank into a register class.
+    // The selector for G_ICMP relies on seeing the register bank for the result
+    // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
+    // be ambiguous whether it's a scalar or vector bool.
+    if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
+      MRI.setRegClass(Src0.getReg(), RC);
+    if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
+      MRI.setRegClass(Src1.getReg(), RC);
+
+    return RBI.constrainGenericRegister(DstReg, *RC, MRI);
+  }
+
+  // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
+  // the result?
+  if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
+    const TargetRegisterClass *RC
+      = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+    unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
+    I.setDesc(TII.get(InstOpc));
+    return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
+           RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
+           RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
+  }
+
+  return false;
+}
+
 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
   MachineFunction *MF = BB->getParent();
@@ -1293,6 +1352,12 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
   }
 
   switch (I.getOpcode()) {
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+    if (selectG_AND_OR_XOR(I))
+      return true;
+    return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_SUB:
     if (selectG_ADD_SUB(I))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 1c1ddde040ad4..1027a0b5683d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -73,6 +73,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   bool selectG_TRUNC(MachineInstr &I) const;
   bool selectG_SZA_EXT(MachineInstr &I) const;
   bool selectG_CONSTANT(MachineInstr &I) const;
+  bool selectG_AND_OR_XOR(MachineInstr &I) const;
   bool selectG_ADD_SUB(MachineInstr &I) const;
   bool selectG_EXTRACT(MachineInstr &I) const;
   bool selectG_MERGE_VALUES(MachineInstr &I) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
new file mode 100644
index 0000000000000..f00452b5564ec
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
@@ -0,0 +1,593 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE32 %s
+
+---
+
+name:            and_s1_vcc_vcc_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: and_s1_vcc_vcc_vcc
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    ; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_CONSTANT i32 0
+    %3:vcc(s1) = G_ICMP intpred(eq), %0, %2
+    %4:vcc(s1) = G_ICMP intpred(eq), %1, %2
+    %5:vcc(s1) = G_AND %3, %4
+    S_ENDPGM 0, implicit %5
+...
+
+---
+
+name:            and_s1_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: and_s1_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; WAVE32-LABEL: name: and_s1_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s1) = G_TRUNC %0
+    %3:sgpr(s1) = G_TRUNC %1
+    %4:sgpr(s1) = G_AND %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            and_s1_scc_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: and_s1_scc_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[AND]](s1)
+    ; WAVE32-LABEL: name: and_s1_scc_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[AND]](s1)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s1) = G_TRUNC %0
+    %3:sgpr(s1) = G_TRUNC %1
+    %4:scc(s1) = G_AND %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            and_s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: and_s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[AND]](s16)
+    ; WAVE32-LABEL: name: and_s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[AND]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:sgpr(s16) = G_TRUNC %1
+    %4:sgpr(s16) = G_AND %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            and_s16_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: and_s16_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[AND]](s16)
+    ; WAVE32-LABEL: name: and_s16_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[AND]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vgpr(s16) = G_AND %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            and_s32_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: and_s32_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; WAVE32-LABEL: name: and_s32_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_s64_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: and_s64_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    ; WAVE32-LABEL: name: and_s64_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s64) = COPY $sgpr2_sgpr3
+    %2:sgpr(s64) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_v2s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: and_v2s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; WAVE32-LABEL: name: and_v2s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr1
+    %2:sgpr(<2 x s16>) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_v2s32_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: and_v2s32_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    ; WAVE32-LABEL: name: and_v2s32_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
+    %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3
+    %2:sgpr(<2 x s32>) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_v4s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: and_v4s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    ; WAVE32-LABEL: name: and_v4s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
+    %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3
+    %2:sgpr(<4 x s16>) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_s32_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: and_s32_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; WAVE32-LABEL: name: and_s32_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_v2s16_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: and_v2s16_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; WAVE64: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[AND]](<2 x s16>)
+    ; WAVE32-LABEL: name: and_v2s16_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; WAVE32: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[AND]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+
+# This should fail to select
+---
+
+name:            and_s64_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: and_s64_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE64: [[AND:%[0-9]+]]:vgpr(s64) = G_AND [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[AND]](s64)
+    ; WAVE32-LABEL: name: and_s64_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE32: [[AND:%[0-9]+]]:vgpr(s64) = G_AND [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[AND]](s64)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vgpr(s64) = G_AND %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_s1_vcc_undef_vcc_undef_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: and_s1_vcc_undef_vcc_undef_vcc
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 undef %1:sreg_64, undef %2:sreg_64
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    ; WAVE32-LABEL: name: and_s1_vcc_undef_vcc_undef_vcc
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %2:vcc(s1) = G_AND undef %0:vcc(s1), undef %1:vcc(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_s1_sgpr_undef_sgpr_undef_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: and_s1_sgpr_undef_sgpr_undef_sgpr
+    ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; WAVE32-LABEL: name: and_s1_sgpr_undef_sgpr_undef_sgpr
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %2:sgpr(s1) = G_AND undef %0:sgpr(s1), undef %1:sgpr(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_s1_vgpr_undef_vgpr_undef_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: and_s1_vgpr_undef_vgpr_undef_vgpr
+    ; WAVE64: [[AND:%[0-9]+]]:vgpr(s1) = G_AND undef %1:vgpr, undef %2:vgpr
+    ; WAVE64: S_ENDPGM 0, implicit [[AND]](s1)
+    ; WAVE32-LABEL: name: and_s1_vgpr_undef_vgpr_undef_vgpr
+    ; WAVE32: [[AND:%[0-9]+]]:vgpr(s1) = G_AND undef %1:vgpr, undef %2:vgpr
+    ; WAVE32: S_ENDPGM 0, implicit [[AND]](s1)
+    %2:vgpr(s1) = G_AND undef %0:vgpr(s1), undef %1:vgpr(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            and_s1_vcc_copy_to_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: and_s1_vcc_copy_to_vcc
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B64_]]
+    ; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s1) = G_TRUNC %0
+    %3:vgpr(s1) = G_TRUNC %1
+    %4:vcc(s1) = COPY %2
+    %5:vcc(s1) = COPY %3
+    %6:vcc(s1) = G_AND %4, %5
+    S_ENDPGM 0, implicit %6
+...
+
+# The selector for the copy of the and result may constrain the result
+# register of the and, losing that it is a VCCRegBank context.
+
+# Works for wave32, should fail for wave64
+---
+name:            copy_select_constrain_vcc_result_reg_wave32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
+    ; WAVE64: liveins: $vgpr0
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
+    ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; WAVE64: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+    ; WAVE64: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; WAVE64: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
+    ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_64_xexec(s1) = S_AND_B32 [[COPY1]](s1), [[COPY2]](s1)
+    ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0(s1) = COPY [[S_AND_B32_]](s1)
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY3]](s1)
+    ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
+    ; WAVE32: liveins: $vgpr0
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[S_MOV_B32_]], implicit $exec
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    %1:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s1) = G_TRUNC %1(s32)
+    %2:sgpr(s1) = G_CONSTANT i1 true
+    %6:sgpr(s32) = G_CONSTANT i32 0
+    %7:sgpr(p1) = G_IMPLICIT_DEF
+    %9:vcc(s1) = COPY %0(s1)
+    %10:vcc(s1) = COPY %2(s1)
+    %8:vcc(s1) = G_AND %9, %10
+    %3:sreg_32_xm0(s1) = COPY %8(s1)
+    S_ENDPGM 0, implicit %3
+
+...
+
+# Works for wave64, should fail for wave32
+---
+name:            copy_select_constrain_vcc_result_reg_wave64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
+    ; WAVE64: liveins: $vgpr0
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_MOV_B32_]], implicit $exec
+    ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B64_]]
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
+    ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
+    ; WAVE32: liveins: $vgpr0
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
+    ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; WAVE32: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+    ; WAVE32: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; WAVE32: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 [[COPY1]](s1), [[COPY2]](s1)
+    ; WAVE32: [[COPY3:%[0-9]+]]:sreg_64_xexec(s1) = COPY [[S_AND_B32_]](s1)
+    ; WAVE32: S_ENDPGM 0, implicit [[COPY3]](s1)
+    %1:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s1) = G_TRUNC %1(s32)
+    %2:sgpr(s1) = G_CONSTANT i1 true
+    %6:sgpr(s32) = G_CONSTANT i32 0
+    %7:sgpr(p1) = G_IMPLICIT_DEF
+    %9:vcc(s1) = COPY %0(s1)
+    %10:vcc(s1) = COPY %2(s1)
+    %8:vcc(s1) = G_AND %9, %10
+    %3:sreg_64_xexec(s1) = COPY %8(s1)
+    S_ENDPGM 0, implicit %3
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
index 2678db1e67f1d..4e120e7c7dd19 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
@@ -1,41 +1,593 @@
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s  | FileCheck %s -check-prefixes=GCN
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE32 %s
 
 ---
 
-name:            or
+name:            or_s1_vcc_vcc_vcc
 legalized:       true
 regBankSelected: true
+tracksRegLiveness: true
 
-# GCN-LABEL: name: or
 body: |
   bb.0:
-    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
-    ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-    ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: or_s1_vcc_vcc_vcc
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    ; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_CONSTANT i32 0
+    %3:vcc(s1) = G_ICMP intpred(eq), %0, %2
+    %4:vcc(s1) = G_ICMP intpred(eq), %1, %2
+    %5:vcc(s1) = G_OR %3, %4
+    S_ENDPGM 0, implicit %5
+...
+
+---
+
+name:            or_s1_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: or_s1_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    ; WAVE32-LABEL: name: or_s1_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
-    %2:vgpr(s32) = COPY $vgpr0
-    %3:vgpr(p1) = COPY $vgpr3_vgpr4
-    %4:sgpr(s32) = G_CONSTANT i32 1
-    %5:sgpr(s32) = G_CONSTANT i32 4096
+    %2:sgpr(s1) = G_TRUNC %0
+    %3:sgpr(s1) = G_TRUNC %1
+    %4:sgpr(s1) = G_OR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            or_s1_scc_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: or_s1_scc_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[OR:%[0-9]+]]:scc(s1) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[OR]](s1)
+    ; WAVE32-LABEL: name: or_s1_scc_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[OR:%[0-9]+]]:scc(s1) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[OR]](s1)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s1) = G_TRUNC %0
+    %3:sgpr(s1) = G_TRUNC %1
+    %4:scc(s1) = G_OR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            or_s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: or_s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[OR]](s16)
+    ; WAVE32-LABEL: name: or_s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[OR]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:sgpr(s16) = G_TRUNC %1
+    %4:sgpr(s16) = G_OR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            or_s16_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: or_s16_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[OR:%[0-9]+]]:vgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[OR]](s16)
+    ; WAVE32-LABEL: name: or_s16_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[OR:%[0-9]+]]:vgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[OR]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vgpr(s16) = G_OR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            or_s32_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: or_s32_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    ; WAVE32-LABEL: name: or_s32_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_s64_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: or_s64_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    ; WAVE32-LABEL: name: or_s64_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s64) = COPY $sgpr2_sgpr3
+    %2:sgpr(s64) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_v2s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: or_v2s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    ; WAVE32-LABEL: name: or_v2s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr1
+    %2:sgpr(<2 x s16>) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_v2s32_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: or_v2s32_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    ; WAVE32-LABEL: name: or_v2s32_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
+    %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3
+    %2:sgpr(<2 x s32>) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
 
-    ; or ss
-    ; GCN: [[SS:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[SGPR0]], [[SGPR1]]
-    %6:sgpr(s32) = G_OR %0, %1
+name:            or_v4s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
 
-    ; or vs
-    ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SS]], [[VGPR0]]
-    %7:vgpr(s32) = G_OR %2, %6
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: or_v4s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    ; WAVE32-LABEL: name: or_v4s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
+    %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3
+    %2:sgpr(<4 x s16>) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; or sv
-    ; GCN: [[SV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SS]], [[VS]]
-    %8:vgpr(s32) = G_OR %6, %7
+---
+
+name:            or_s32_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: or_s32_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_OR_B32_e32_]]
+    ; WAVE32-LABEL: name: or_s32_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_OR_B32_e32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_v2s16_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: or_v2s16_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; WAVE64: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[OR]](<2 x s16>)
+    ; WAVE32-LABEL: name: or_v2s16_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; WAVE32: [[OR:%[0-9]+]]:vgpr(<2 x s16>) = G_OR [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[OR]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+
+# This should fail to select
+---
+
+name:            or_s64_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: or_s64_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE64: [[OR:%[0-9]+]]:vgpr(s64) = G_OR [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[OR]](s64)
+    ; WAVE32-LABEL: name: or_s64_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE32: [[OR:%[0-9]+]]:vgpr(s64) = G_OR [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[OR]](s64)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vgpr(s64) = G_OR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_s1_vcc_undef_vcc_undef_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: or_s1_vcc_undef_vcc_undef_vcc
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 undef %1:sreg_64, undef %2:sreg_64
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    ; WAVE32-LABEL: name: or_s1_vcc_undef_vcc_undef_vcc
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %2:vcc(s1) = G_OR undef %0:vcc(s1), undef %1:vcc(s1)
+    S_ENDPGM 0, implicit %2
+...
 
-    ; or vv
-    ; GCN: [[VV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SV]], [[VGPR0]]
-    %9:vgpr(s32) = G_OR %8, %2
+---
+
+name:            or_s1_sgpr_undef_sgpr_undef_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: or_s1_sgpr_undef_sgpr_undef_sgpr
+    ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    ; WAVE32-LABEL: name: or_s1_sgpr_undef_sgpr_undef_sgpr
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %2:sgpr(s1) = G_OR undef %0:sgpr(s1), undef %1:sgpr(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_s1_vgpr_undef_vgpr_undef_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: or_s1_vgpr_undef_vgpr_undef_vgpr
+    ; WAVE64: [[OR:%[0-9]+]]:vgpr(s1) = G_OR undef %1:vgpr, undef %2:vgpr
+    ; WAVE64: S_ENDPGM 0, implicit [[OR]](s1)
+    ; WAVE32-LABEL: name: or_s1_vgpr_undef_vgpr_undef_vgpr
+    ; WAVE32: [[OR:%[0-9]+]]:vgpr(s1) = G_OR undef %1:vgpr, undef %2:vgpr
+    ; WAVE32: S_ENDPGM 0, implicit [[OR]](s1)
+    %2:vgpr(s1) = G_OR undef %0:vgpr(s1), undef %1:vgpr(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            or_s1_vcc_copy_to_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: or_s1_vcc_copy_to_vcc
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B64_]]
+    ; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s1) = G_TRUNC %0
+    %3:vgpr(s1) = G_TRUNC %1
+    %4:vcc(s1) = COPY %2
+    %5:vcc(s1) = COPY %3
+    %6:vcc(s1) = G_OR %4, %5
+    S_ENDPGM 0, implicit %6
+...
+
+# The selector for the copy of the or result may constrain the result
+# register of the or, losing that it is a VCCRegBank context.
+
+# Works for wave32, should fail for wave64
+---
+name:            copy_select_constrain_vcc_result_reg_wave32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
+    ; WAVE64: liveins: $vgpr0
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
+    ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; WAVE64: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+    ; WAVE64: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; WAVE64: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
+    ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_64_xexec(s1) = S_OR_B32 [[COPY1]](s1), [[COPY2]](s1)
+    ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0(s1) = COPY [[S_OR_B32_]](s1)
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY3]](s1)
+    ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
+    ; WAVE32: liveins: $vgpr0
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[S_MOV_B32_]], implicit $exec
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
+    %1:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s1) = G_TRUNC %1(s32)
+    %2:sgpr(s1) = G_CONSTANT i1 true
+    %6:sgpr(s32) = G_CONSTANT i32 0
+    %7:sgpr(p1) = G_IMPLICIT_DEF
+    %9:vcc(s1) = COPY %0(s1)
+    %10:vcc(s1) = COPY %2(s1)
+    %8:vcc(s1) = G_OR %9, %10
+    %3:sreg_32_xm0(s1) = COPY %8(s1)
+    S_ENDPGM 0, implicit %3
+
+...
+
+# Works for wave64, should fail for wave32
+---
+name:            copy_select_constrain_vcc_result_reg_wave64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
 
-    G_STORE %9, %3 :: (store 4, addrspace 1)
+    ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
+    ; WAVE64: liveins: $vgpr0
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_MOV_B32_]], implicit $exec
+    ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]]
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
+    ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
+    ; WAVE32: liveins: $vgpr0
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
+    ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; WAVE32: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+    ; WAVE32: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; WAVE32: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[COPY1]](s1), [[COPY2]](s1)
+    ; WAVE32: [[COPY3:%[0-9]+]]:sreg_64_xexec(s1) = COPY [[S_OR_B32_]](s1)
+    ; WAVE32: S_ENDPGM 0, implicit [[COPY3]](s1)
+    %1:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s1) = G_TRUNC %1(s32)
+    %2:sgpr(s1) = G_CONSTANT i1 true
+    %6:sgpr(s32) = G_CONSTANT i32 0
+    %7:sgpr(p1) = G_IMPLICIT_DEF
+    %9:vcc(s1) = COPY %0(s1)
+    %10:vcc(s1) = COPY %2(s1)
+    %8:vcc(s1) = G_OR %9, %10
+    %3:sreg_64_xexec(s1) = COPY %8(s1)
+    S_ENDPGM 0, implicit %3
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
new file mode 100644
index 0000000000000..3035e022646f0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
@@ -0,0 +1,593 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s  | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr="+wavefrontsize32" -run-pass=instruction-select -global-isel-abort=0 -verify-machineinstrs -o - %s  | FileCheck -check-prefix=WAVE32 %s
+
+---
+
+name:            xor_s1_vcc_vcc_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: xor_s1_vcc_vcc_vcc
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    ; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_CONSTANT i32 0
+    %3:vcc(s1) = G_ICMP intpred(eq), %0, %2
+    %4:vcc(s1) = G_ICMP intpred(eq), %1, %2
+    %5:vcc(s1) = G_XOR %3, %4
+    S_ENDPGM 0, implicit %5
+...
+
+---
+
+name:            xor_s1_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: xor_s1_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    ; WAVE32-LABEL: name: xor_s1_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s1) = G_TRUNC %0
+    %3:sgpr(s1) = G_TRUNC %1
+    %4:sgpr(s1) = G_XOR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            xor_s1_scc_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: xor_s1_scc_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[XOR:%[0-9]+]]:scc(s1) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s1)
+    ; WAVE32-LABEL: name: xor_s1_scc_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[XOR:%[0-9]+]]:scc(s1) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s1)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s1) = G_TRUNC %0
+    %3:sgpr(s1) = G_TRUNC %1
+    %4:scc(s1) = G_XOR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            xor_s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: xor_s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[XOR:%[0-9]+]]:sgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s16)
+    ; WAVE32-LABEL: name: xor_s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[XOR:%[0-9]+]]:sgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:sgpr(s16) = G_TRUNC %1
+    %4:sgpr(s16) = G_XOR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            xor_s16_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: xor_s16_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE64: [[XOR:%[0-9]+]]:vgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s16)
+    ; WAVE32-LABEL: name: xor_s16_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+    ; WAVE32: [[XOR:%[0-9]+]]:vgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_TRUNC %1
+    %4:vgpr(s16) = G_XOR %2, %3
+    S_ENDPGM 0, implicit %4
+...
+
+---
+
+name:            xor_s32_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: xor_s32_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    ; WAVE32-LABEL: name: xor_s32_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_s64_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: xor_s64_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    ; WAVE32-LABEL: name: xor_s64_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s64) = COPY $sgpr2_sgpr3
+    %2:sgpr(s64) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_v2s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; WAVE64-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0, $sgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    ; WAVE32-LABEL: name: xor_v2s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0, $sgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr1
+    %2:sgpr(<2 x s16>) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_v2s32_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    ; WAVE32-LABEL: name: xor_v2s32_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    %0:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1
+    %1:sgpr(<2 x s32>) = COPY $sgpr2_sgpr3
+    %2:sgpr(<2 x s32>) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_v4s16_sgpr_sgpr_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr
+    ; WAVE64: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    ; WAVE32-LABEL: name: xor_v4s16_sgpr_sgpr_sgpr
+    ; WAVE32: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+    ; WAVE32: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
+    %1:sgpr(<4 x s16>) = COPY $sgpr2_sgpr3
+    %2:sgpr(<4 x s16>) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_s32_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: xor_s32_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE64: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]]
+    ; WAVE32-LABEL: name: xor_s32_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; WAVE32: S_ENDPGM 0, implicit [[V_XOR_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_v2s16_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; WAVE64: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](<2 x s16>)
+    ; WAVE32-LABEL: name: xor_v2s16_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; WAVE32: [[XOR:%[0-9]+]]:vgpr(<2 x s16>) = G_XOR [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+
+# This should fail to select
+---
+
+name:            xor_s64_vgpr_vgpr_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64-LABEL: name: xor_s64_vgpr_vgpr_vgpr
+    ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE64: [[XOR:%[0-9]+]]:vgpr(s64) = G_XOR [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s64)
+    ; WAVE32-LABEL: name: xor_s64_vgpr_vgpr_vgpr
+    ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+    ; WAVE32: [[XOR:%[0-9]+]]:vgpr(s64) = G_XOR [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s64)
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    %2:vgpr(s64) = G_XOR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_s1_vcc_undef_vcc_undef_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: xor_s1_vcc_undef_vcc_undef_vcc
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 undef %1:sreg_64, undef %2:sreg_64
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    ; WAVE32-LABEL: name: xor_s1_vcc_undef_vcc_undef_vcc
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %2:vcc(s1) = G_XOR undef %0:vcc(s1), undef %1:vcc(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_s1_sgpr_undef_sgpr_undef_sgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: xor_s1_sgpr_undef_sgpr_undef_sgpr
+    ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    ; WAVE32-LABEL: name: xor_s1_sgpr_undef_sgpr_undef_sgpr
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 undef %1:sreg_32_xm0, undef %2:sreg_32_xm0
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %2:sgpr(s1) = G_XOR undef %0:sgpr(s1), undef %1:sgpr(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_s1_vgpr_undef_vgpr_undef_vgpr
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: xor_s1_vgpr_undef_vgpr_undef_vgpr
+    ; WAVE64: [[XOR:%[0-9]+]]:vgpr(s1) = G_XOR undef %1:vgpr, undef %2:vgpr
+    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s1)
+    ; WAVE32-LABEL: name: xor_s1_vgpr_undef_vgpr_undef_vgpr
+    ; WAVE32: [[XOR:%[0-9]+]]:vgpr(s1) = G_XOR undef %1:vgpr, undef %2:vgpr
+    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s1)
+    %2:vgpr(s1) = G_XOR undef %0:vgpr(s1), undef %1:vgpr(s1)
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            xor_s1_vcc_copy_to_vcc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; WAVE64-LABEL: name: xor_s1_vcc_copy_to_vcc
+    ; WAVE64: liveins: $vgpr0, $vgpr1
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B64_]]
+    ; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc
+    ; WAVE32: liveins: $vgpr0, $vgpr1
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY1]], implicit $exec
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s1) = G_TRUNC %0
+    %3:vgpr(s1) = G_TRUNC %1
+    %4:vcc(s1) = COPY %2
+    %5:vcc(s1) = COPY %3
+    %6:vcc(s1) = G_XOR %4, %5
+    S_ENDPGM 0, implicit %6
+...
+
+# The selector for the copy of the xor result may constrain the result
+# register of the xor, losing that it is a VCCRegBank context.
+
+# Works for wave32, should fail for wave64
+---
+name:            copy_select_constrain_vcc_result_reg_wave32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
+    ; WAVE64: liveins: $vgpr0
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE64: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE64: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
+    ; WAVE64: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; WAVE64: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+    ; WAVE64: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; WAVE64: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
+    ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_64_xexec(s1) = S_XOR_B32 [[COPY1]](s1), [[COPY2]](s1)
+    ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0(s1) = COPY [[S_XOR_B32_]](s1)
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY3]](s1)
+    ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
+    ; WAVE32: liveins: $vgpr0
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
+    ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0 = V_CMP_NE_U32_e64 0, [[S_MOV_B32_]], implicit $exec
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
+    %1:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s1) = G_TRUNC %1(s32)
+    %2:sgpr(s1) = G_CONSTANT i1 true
+    %6:sgpr(s32) = G_CONSTANT i32 0
+    %7:sgpr(p1) = G_IMPLICIT_DEF
+    %9:vcc(s1) = COPY %0(s1)
+    %10:vcc(s1) = COPY %2(s1)
+    %8:vcc(s1) = G_XOR %9, %10
+    %3:sreg_32_xm0(s1) = COPY %8(s1)
+    S_ENDPGM 0, implicit %3
+
+...
+
+# Works for wave64, should fail for wave32
+---
+name:            copy_select_constrain_vcc_result_reg_wave64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
+    ; WAVE64: liveins: $vgpr0
+    ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
+    ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+    ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_MOV_B32_]], implicit $exec
+    ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]]
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B64_]]
+    ; WAVE64: S_ENDPGM 0, implicit [[COPY1]]
+    ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
+    ; WAVE32: liveins: $vgpr0
+    ; WAVE32: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; WAVE32: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; WAVE32: [[C:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 true
+    ; WAVE32: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; WAVE32: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
+    ; WAVE32: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; WAVE32: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[C]](s1)
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_XOR_B32 [[COPY1]](s1), [[COPY2]](s1)
+    ; WAVE32: [[COPY3:%[0-9]+]]:sreg_64_xexec(s1) = COPY [[S_XOR_B32_]](s1)
+    ; WAVE32: S_ENDPGM 0, implicit [[COPY3]](s1)
+    %1:vgpr(s32) = COPY $vgpr0
+    %0:vgpr(s1) = G_TRUNC %1(s32)
+    %2:sgpr(s1) = G_CONSTANT i1 true
+    %6:sgpr(s32) = G_CONSTANT i32 0
+    %7:sgpr(p1) = G_IMPLICIT_DEF
+    %9:vcc(s1) = COPY %0(s1)
+    %10:vcc(s1) = COPY %2(s1)
+    %8:vcc(s1) = G_XOR %9, %10
+    %3:sreg_64_xexec(s1) = COPY %8(s1)
+    S_ENDPGM 0, implicit %3
+
+...

From c5d7b0c4547263fd859e0c7be535575858fd312a Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Mon, 15 Jul 2019 19:53:42 +0000
Subject: [PATCH 154/451] Constrain workaround to avoid affecting other
 buildbots

llvm-svn: 366122
---
 libcxx/CMakeLists.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index fd74c2742e0a9..5df3f257294e3 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -151,8 +151,10 @@ set_property(CACHE LIBCXX_CXX_ABI PROPERTY STRINGS ;${CXXABIS})
 # Without this all the bots fail while building libc++
 if (DEFINED ENV{USER})
   if (("$ENV{USER}" STREQUAL "buildbot") OR (("$ENV{USER}" STREQUAL "llvmbb") OR ("$ENV{USER}" STREQUAL "buildslave")))
-    message(WARNING "OVERRIDING BUILDBOT CONFIG")
-    set(LIBCXX_CXX_ABI "default" CACHE STRING "FIXME" FORCE)
+    if (LIBCXX_CXX_ABI STREQUAL "libcxxabi" AND NOT DEFINED LIBCXX_CXX_ABI_INCLUDE_PATHS)
+      message(WARNING "OVERRIDING BUILDBOT CONFIG")
+      set(LIBCXX_CXX_ABI "default" CACHE STRING "FIXME" FORCE)
+    endif()
   endif()
 endif()
 # Setup the default options if LIBCXX_CXX_ABI is not specified.

From c5e7f5624966aa4a83869ca4fceb4d7b96a22d34 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Mon, 15 Jul 2019 20:02:23 +0000
Subject: [PATCH 155/451] ARM MTE stack sanitizer.

Add "memtag" sanitizer that detects and mitigates stack memory issues
using armv8.5 Memory Tagging Extension.

It is similar in principle to HWASan, which is a software implementation
of the same idea, but there are enough differencies to warrant a new
sanitizer type IMHO. It is also expected to have very different
performance properties.

The new sanitizer does not have a runtime library (it may grow one
later, along with a "debugging" mode). Similar to SafeStack and
StackProtector, the instrumentation pass (in a follow up change) will be
inserted in all cases, but will only affect functions marked with the
new sanitize_memtag attribute.

Reviewers: pcc, hctim, vitalybuka, ostannard

Subscribers: srhines, mehdi_amini, javed.absar, kristof.beyls, hiraditya, cryptoad, steven_wu, dexonsmith, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D64169

llvm-svn: 366123
---
 clang/include/clang/Basic/Features.def        |  1 +
 clang/include/clang/Basic/Sanitizers.def      |  3 ++
 clang/lib/CodeGen/CGDeclCXX.cpp               |  4 +++
 clang/lib/CodeGen/CodeGenFunction.cpp         |  2 ++
 clang/lib/CodeGen/CodeGenModule.cpp           |  6 ++--
 clang/lib/CodeGen/SanitizerMetadata.cpp       | 21 ++++++-------
 clang/lib/Driver/SanitizerArgs.cpp            | 10 +++++--
 clang/lib/Driver/ToolChains/Linux.cpp         |  2 ++
 clang/test/CodeGen/memtag-attr.cpp            | 19 ++++++++++++
 clang/test/Driver/fsanitize.c                 | 10 +++++++
 .../Lexer/has_feature_memtag_sanitizer.cpp    | 11 +++++++
 clang/test/SemaCXX/attr-no-sanitize.cpp       |  5 ++++
 llvm/docs/BitCodeFormat.rst                   |  1 +
 llvm/docs/LangRef.rst                         |  4 +++
 llvm/include/llvm/Bitcode/LLVMBitCodes.h      |  3 +-
 llvm/include/llvm/IR/Attributes.td            |  4 +++
 llvm/lib/AsmParser/LLLexer.cpp                |  1 +
 llvm/lib/AsmParser/LLParser.cpp               |  4 +++
 llvm/lib/AsmParser/LLToken.h                  |  1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  8 ++++-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  2 ++
 llvm/lib/IR/Attributes.cpp                    |  2 ++
 llvm/lib/IR/Verifier.cpp                      |  1 +
 .../lib/Transforms/IPO/ForceFunctionAttrs.cpp |  1 +
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   |  1 +
 llvm/test/Bitcode/attributes.ll               | 11 +++++--
 llvm/test/Transforms/Inline/attributes.ll     | 30 +++++++++++++++++++
 llvm/utils/emacs/llvm-mode.el                 |  2 +-
 28 files changed, 149 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/CodeGen/memtag-attr.cpp
 create mode 100644 clang/test/Lexer/has_feature_memtag_sanitizer.cpp

diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def
index 7ab7c4d17005b..7081c02e83eac 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -42,6 +42,7 @@ FEATURE(address_sanitizer,
 FEATURE(hwaddress_sanitizer,
         LangOpts.Sanitize.hasOneOf(SanitizerKind::HWAddress |
                                    SanitizerKind::KernelHWAddress))
+FEATURE(memtag_sanitizer, LangOpts.Sanitize.has(SanitizerKind::MemTag))
 FEATURE(xray_instrument, LangOpts.XRayInstrument)
 FEATURE(undefined_behavior_sanitizer,
         LangOpts.Sanitize.hasOneOf(SanitizerKind::Undefined))
diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def
index 1bb433a2b0e9b..0037cc2146f26 100644
--- a/clang/include/clang/Basic/Sanitizers.def
+++ b/clang/include/clang/Basic/Sanitizers.def
@@ -55,6 +55,9 @@ SANITIZER("hwaddress", HWAddress)
 // Kernel Hardware-assisted AddressSanitizer (KHWASan)
 SANITIZER("kernel-hwaddress", KernelHWAddress)
 
+// A variant of AddressSanitizer using AArch64 MTE extension.
+SANITIZER("memtag", MemTag)
+
 // MemorySanitizer
 SANITIZER("memory", Memory)
 
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 1c7c6fb5413db..7a0605b8450ac 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -369,6 +369,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
       !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc))
     Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
 
+  if (getLangOpts().Sanitize.has(SanitizerKind::MemTag) &&
+      !isInSanitizerBlacklist(SanitizerKind::MemTag, Fn, Loc))
+    Fn->addFnAttr(llvm::Attribute::SanitizeMemTag);
+
   if (getLangOpts().Sanitize.has(SanitizerKind::Thread) &&
       !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc))
     Fn->addFnAttr(llvm::Attribute::SanitizeThread);
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 937648700a2e8..eafe26674434f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -696,6 +696,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
     Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
   if (SanOpts.hasOneOf(SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress))
     Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
+  if (SanOpts.has(SanitizerKind::MemTag))
+    Fn->addFnAttr(llvm::Attribute::SanitizeMemTag);
   if (SanOpts.has(SanitizerKind::Thread))
     Fn->addFnAttr(llvm::Attribute::SanitizeThread);
   if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory))
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba501d645c2c9..7ab960e8bcee5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2239,9 +2239,11 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV,
                                            SourceLocation Loc, QualType Ty,
                                            StringRef Category) const {
   // For now globals can be blacklisted only in ASan and KASan.
-  const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask &
+  const SanitizerMask EnabledAsanMask =
+      LangOpts.Sanitize.Mask &
       (SanitizerKind::Address | SanitizerKind::KernelAddress |
-       SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress);
+       SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress |
+       SanitizerKind::MemTag);
   if (!EnabledAsanMask)
     return false;
   const auto &SanitizerBL = getContext().getSanitizerBlacklist();
diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp
index 3211a3e74d255..ebc9cd5529bc6 100644
--- a/clang/lib/CodeGen/SanitizerMetadata.cpp
+++ b/clang/lib/CodeGen/SanitizerMetadata.cpp
@@ -20,14 +20,17 @@ using namespace CodeGen;
 
 SanitizerMetadata::SanitizerMetadata(CodeGenModule &CGM) : CGM(CGM) {}
 
+static bool isAsanHwasanOrMemTag(const SanitizerSet& SS) {
+  return SS.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress |
+                     SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress |
+                     SanitizerKind::MemTag);
+}
+
 void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
                                            SourceLocation Loc, StringRef Name,
                                            QualType Ty, bool IsDynInit,
                                            bool IsBlacklisted) {
-  if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
-                                           SanitizerKind::KernelAddress |
-                                           SanitizerKind::HWAddress |
-                                           SanitizerKind::KernelHWAddress))
+  if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
     return;
   IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init");
   IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
@@ -58,10 +61,7 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
 
 void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
                                            const VarDecl &D, bool IsDynInit) {
-  if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
-                                           SanitizerKind::KernelAddress |
-                                           SanitizerKind::HWAddress |
-                                           SanitizerKind::KernelHWAddress))
+  if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
     return;
   std::string QualName;
   llvm::raw_string_ostream OS(QualName);
@@ -78,10 +78,7 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV,
 void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) {
   // For now, just make sure the global is not modified by the ASan
   // instrumentation.
-  if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address |
-                                          SanitizerKind::KernelAddress |
-                                          SanitizerKind::HWAddress |
-                                          SanitizerKind::KernelHWAddress))
+  if (isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
     reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true);
 }
 
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 8d4b9fc85de23..9132faa917646 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -40,7 +40,8 @@ static const SanitizerMask NeedsUnwindTables =
 static const SanitizerMask SupportsCoverage =
     SanitizerKind::Address | SanitizerKind::HWAddress |
     SanitizerKind::KernelAddress | SanitizerKind::KernelHWAddress |
-    SanitizerKind::Memory | SanitizerKind::KernelMemory | SanitizerKind::Leak |
+    SanitizerKind::MemTag | SanitizerKind::Memory |
+    SanitizerKind::KernelMemory | SanitizerKind::Leak |
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
     SanitizerKind::DataFlow | SanitizerKind::Fuzzer |
@@ -122,6 +123,7 @@ static void addDefaultBlacklists(const Driver &D, SanitizerMask Kinds,
     SanitizerMask Mask;
   } Blacklists[] = {{"asan_blacklist.txt", SanitizerKind::Address},
                     {"hwasan_blacklist.txt", SanitizerKind::HWAddress},
+                    {"memtag_blacklist.txt", SanitizerKind::MemTag},
                     {"msan_blacklist.txt", SanitizerKind::Memory},
                     {"tsan_blacklist.txt", SanitizerKind::Thread},
                     {"dfsan_abilist.txt", SanitizerKind::DataFlow},
@@ -420,7 +422,11 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
                      SanitizerKind::Address | SanitizerKind::HWAddress |
                          SanitizerKind::Leak | SanitizerKind::Thread |
                          SanitizerKind::Memory | SanitizerKind::KernelAddress |
-                         SanitizerKind::Scudo | SanitizerKind::SafeStack)};
+                         SanitizerKind::Scudo | SanitizerKind::SafeStack),
+      std::make_pair(SanitizerKind::MemTag,
+                     SanitizerKind::Address | SanitizerKind::KernelAddress |
+                         SanitizerKind::HWAddress |
+                         SanitizerKind::KernelHWAddress)};
   // Enable toolchain specific default sanitizers if not explicitly disabled.
   SanitizerMask Default = TC.getDefaultSanitizers() & ~AllRemove;
 
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index b4e19b12c8d72..d900508ad9385 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -1026,6 +1026,8 @@ SanitizerMask Linux::getSupportedSanitizers() const {
     Res |= SanitizerKind::HWAddress;
     Res |= SanitizerKind::KernelHWAddress;
   }
+  if (IsAArch64)
+    Res |= SanitizerKind::MemTag;
   return Res;
 }
 
diff --git a/clang/test/CodeGen/memtag-attr.cpp b/clang/test/CodeGen/memtag-attr.cpp
new file mode 100644
index 0000000000000..f0b0785367ba9
--- /dev/null
+++ b/clang/test/CodeGen/memtag-attr.cpp
@@ -0,0 +1,19 @@
+// Make sure the sanitize_memtag attribute is emitted when using MemTag sanitizer.
+// Make sure __attribute__((no_sanitize("memtag")) disables instrumentation.
+
+// RUN: %clang_cc1 -triple aarch64-unknown-linux -disable-O0-optnone \
+// RUN:   -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO %s
+
+// RUN: %clang_cc1 -triple aarch64-unknown-linux -fsanitize=memtag \
+// RUN:   -disable-O0-optnone -emit-llvm -o - %s | \
+// RUN:   FileCheck -check-prefix=CHECK-MEMTAG %s
+
+int HasSanitizeMemTag() { return 1; }
+// CHECK-NO: {{Function Attrs: noinline nounwind$}}
+// CHECK-MEMTAG: Function Attrs: noinline nounwind sanitize_memtag
+
+__attribute__((no_sanitize("memtag"))) int NoSanitizeQuoteAddress() {
+  return 0;
+}
+// CHECK-NO: {{Function Attrs: noinline nounwind$}}
+// CHECK-MEMTAG: {{Function Attrs: noinline nounwind$}}
diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index a275b576688e6..01367c7e67881 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -181,6 +181,16 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=hwaddress,address -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANHA-SANA
 // CHECK-SANHA-SANA: '-fsanitize=hwaddress' not allowed with '-fsanitize=address'
 
+// RUN: %clang -target aarch64-linux-android -fsanitize=memtag,address -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-SANA
+// CHECK-SANMT-SANA: '-fsanitize=memtag' not allowed with '-fsanitize=address'
+
+// RUN: %clang -target aarch64-linux-android -fsanitize=memtag,hwaddress -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-SANHA
+// CHECK-SANMT-SANHA: '-fsanitize=memtag' not allowed with '-fsanitize=hwaddress'
+
+// RUN: %clang -target i386-linux-android -fsanitize=memtag -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-BAD-ARCH
+// RUN: %clang -target x86_64-linux-android -fsanitize=memtag -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-BAD-ARCH
+// CHECK-SANMT-BAD-ARCH: unsupported option '-fsanitize=memtag' for target
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-after-scope %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-SCOPE
 // RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fsanitize-address-use-after-scope -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-SCOPE
 // CHECK-USE-AFTER-SCOPE: -cc1{{.*}}-fsanitize-address-use-after-scope
diff --git a/clang/test/Lexer/has_feature_memtag_sanitizer.cpp b/clang/test/Lexer/has_feature_memtag_sanitizer.cpp
new file mode 100644
index 0000000000000..64e55d7c23cf6
--- /dev/null
+++ b/clang/test/Lexer/has_feature_memtag_sanitizer.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -E -fsanitize=memtag %s -o - | FileCheck --check-prefix=CHECK-MEMTAG %s
+// RUN: %clang_cc1 -E  %s -o - | FileCheck --check-prefix=CHECK-NO-MEMTAG %s
+
+#if __has_feature(memtag_sanitizer)
+int MemTagSanitizerEnabled();
+#else
+int MemTagSanitizerDisabled();
+#endif
+
+// CHECK-MEMTAG: MemTagSanitizerEnabled
+// CHECK-NO-MEMTAG: MemTagSanitizerDisabled
diff --git a/clang/test/SemaCXX/attr-no-sanitize.cpp b/clang/test/SemaCXX/attr-no-sanitize.cpp
index 02bc9a9e7f8f4..feff7ef6163dc 100644
--- a/clang/test/SemaCXX/attr-no-sanitize.cpp
+++ b/clang/test/SemaCXX/attr-no-sanitize.cpp
@@ -30,3 +30,8 @@ int f5() __attribute__((no_sanitize("address", "thread", "hwaddress")));
 // DUMP: NoSanitizeAttr {{.*}} unknown
 // PRINT: int f6() __attribute__((no_sanitize("unknown")))
 int f6() __attribute__((no_sanitize("unknown"))); // expected-warning{{unknown sanitizer 'unknown' ignored}}
+
+// DUMP-LABEL: FunctionDecl {{.*}} f7
+// DUMP: NoSanitizeAttr {{.*}} memtag
+// PRINT: int f7() {{\[\[}}clang::no_sanitize("memtag")]]
+[[clang::no_sanitize("memtag")]] int f7();
diff --git a/llvm/docs/BitCodeFormat.rst b/llvm/docs/BitCodeFormat.rst
index 5e1c5cacb439c..4e653ae55d535 100644
--- a/llvm/docs/BitCodeFormat.rst
+++ b/llvm/docs/BitCodeFormat.rst
@@ -1057,6 +1057,7 @@ The integer codes are mapped to well-known attributes as follows.
 * code 56: ``nocf_check``
 * code 57: ``optforfuzzing``
 * code 58: ``shadowcallstack``
+* code 64: ``sanitize_memtag``
 
 .. note::
   The ``allocsize`` attribute has a special encoding for its arguments. Its two
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 18f760d9b0500..87e8a557504a2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1681,6 +1681,10 @@ example:
     This attribute indicates that HWAddressSanitizer checks
     (dynamic address safety analysis based on tagged pointers) are enabled for
     this function.
+``sanitize_memtag``
+    This attribute indicates that MemTagSanitizer checks
+    (dynamic address safety analysis based on Armv8 MTE) are enabled for
+    this function.
 ``speculative_load_hardening``
     This attribute indicates that
     `Speculative Load Hardening <https://llvm.org/docs/SpeculativeLoadHardening.html>`_
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 4582a6a4d83d6..decd4dd3a9659 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -630,7 +630,8 @@ enum AttributeKindCodes {
   ATTR_KIND_IMMARG = 60,
   ATTR_KIND_WILLRETURN = 61,
   ATTR_KIND_NOFREE = 62,
-  ATTR_KIND_NOSYNC = 63
+  ATTR_KIND_NOSYNC = 63,
+  ATTR_KIND_SANITIZE_MEMTAG = 64,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index a549f30590028..153046d2311c8 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -185,6 +185,9 @@ def SanitizeMemory : EnumAttr<"sanitize_memory">;
 /// HWAddressSanitizer is on.
 def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">;
 
+/// MemTagSanitizer is on.
+def SanitizeMemTag : EnumAttr<"sanitize_memtag">;
+
 /// Speculative Load Hardening is enabled.
 ///
 /// Note that this uses the default compatibility (always compatible during
@@ -233,6 +236,7 @@ def : CompatRule<"isEqual<SanitizeAddressAttr>">;
 def : CompatRule<"isEqual<SanitizeThreadAttr>">;
 def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
 def : CompatRule<"isEqual<SanitizeHWAddressAttr>">;
+def : CompatRule<"isEqual<SanitizeMemTagAttr>">;
 def : CompatRule<"isEqual<SafeStackAttr>">;
 def : CompatRule<"isEqual<ShadowCallStackAttr>">;
 
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 2c2361a6abc61..72d2357c29333 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -679,6 +679,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(shadowcallstack);
   KEYWORD(sanitize_address);
   KEYWORD(sanitize_hwaddress);
+  KEYWORD(sanitize_memtag);
   KEYWORD(sanitize_thread);
   KEYWORD(sanitize_memory);
   KEYWORD(speculative_load_hardening);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index ce8c1c4fc8188..87dff6468f2d0 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1311,6 +1311,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       B.addAttribute(Attribute::SanitizeAddress); break;
     case lltok::kw_sanitize_hwaddress:
       B.addAttribute(Attribute::SanitizeHWAddress); break;
+    case lltok::kw_sanitize_memtag:
+      B.addAttribute(Attribute::SanitizeMemTag); break;
     case lltok::kw_sanitize_thread:
       B.addAttribute(Attribute::SanitizeThread); break;
     case lltok::kw_sanitize_memory:
@@ -1668,6 +1670,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_returns_twice:
     case lltok::kw_sanitize_address:
     case lltok::kw_sanitize_hwaddress:
+    case lltok::kw_sanitize_memtag:
     case lltok::kw_sanitize_memory:
     case lltok::kw_sanitize_thread:
     case lltok::kw_speculative_load_hardening:
@@ -1766,6 +1769,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_returns_twice:
     case lltok::kw_sanitize_address:
     case lltok::kw_sanitize_hwaddress:
+    case lltok::kw_sanitize_memtag:
     case lltok::kw_sanitize_memory:
     case lltok::kw_sanitize_thread:
     case lltok::kw_speculative_load_hardening:
diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h
index 4afe8a6c084c1..0e9ba4db47427 100644
--- a/llvm/lib/AsmParser/LLToken.h
+++ b/llvm/lib/AsmParser/LLToken.h
@@ -176,6 +176,7 @@ enum Kind {
   kw_argmemonly,
   kw_sanitize_address,
   kw_sanitize_hwaddress,
+  kw_sanitize_memtag,
   kw_builtin,
   kw_byval,
   kw_inalloca,
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 6cad3b94e5e74..29dc7f6163929 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1296,6 +1296,9 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
   case Attribute::AllocSize:
     llvm_unreachable("allocsize not supported in raw format");
     break;
+  case Attribute::SanitizeMemTag:
+    llvm_unreachable("sanitize_memtag attribute not supported in raw format");
+    break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -1305,7 +1308,8 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) {
 
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
-    if (I == Attribute::Dereferenceable ||
+    if (I == Attribute::SanitizeMemTag ||
+        I == Attribute::Dereferenceable ||
         I == Attribute::DereferenceableOrNull ||
         I == Attribute::ArgMemOnly ||
         I == Attribute::AllocSize ||
@@ -1534,6 +1538,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::ZExt;
   case bitc::ATTR_KIND_IMMARG:
     return Attribute::ImmArg;
+  case bitc::ATTR_KIND_SANITIZE_MEMTAG:
+    return Attribute::SanitizeMemTag;
   }
 }
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index a23b44f475161..5c7b970a3a751 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -723,6 +723,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_Z_EXT;
   case Attribute::ImmArg:
     return bitc::ATTR_KIND_IMMARG;
+  case Attribute::SanitizeMemTag:
+    return bitc::ATTR_KIND_SANITIZE_MEMTAG;
   case Attribute::EndAttrKinds:
     llvm_unreachable("Can not encode end-attribute kinds marker.");
   case Attribute::None:
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 1ba703bb14c76..bb90bcd7dd748 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -283,6 +283,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "sanitize_address";
   if (hasAttribute(Attribute::SanitizeHWAddress))
     return "sanitize_hwaddress";
+  if (hasAttribute(Attribute::SanitizeMemTag))
+    return "sanitize_memtag";
   if (hasAttribute(Attribute::AlwaysInline))
     return "alwaysinline";
   if (hasAttribute(Attribute::ArgMemOnly))
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index cee5bf7dc8dd4..9346c8bda75dd 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1516,6 +1516,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
   case Attribute::ReturnsTwice:
   case Attribute::SanitizeAddress:
   case Attribute::SanitizeHWAddress:
+  case Attribute::SanitizeMemTag:
   case Attribute::SanitizeThread:
   case Attribute::SanitizeMemory:
   case Attribute::MinSize:
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index cd1fc3798201f..b38cb6d0ed3f9 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -57,6 +57,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) {
       .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress)
       .Case("sanitize_memory", Attribute::SanitizeMemory)
       .Case("sanitize_thread", Attribute::SanitizeThread)
+      .Case("sanitize_memtag", Attribute::SanitizeMemTag)
       .Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening)
       .Case("ssp", Attribute::StackProtect)
       .Case("sspreq", Attribute::StackProtectReq)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index da137da8f7b15..fa6d3f8ae8738 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -850,6 +850,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::SanitizeMemory:
       case Attribute::SanitizeThread:
       case Attribute::SanitizeHWAddress:
+      case Attribute::SanitizeMemTag:
       case Attribute::SpeculativeLoadHardening:
       case Attribute::StackProtect:
       case Attribute::StackProtectReq:
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index c6e146791d898..03a98a58ef0ee 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -204,7 +204,7 @@ define void @f34()
 ; CHECK: define void @f34()
 {
   call void @nobuiltin() nobuiltin
-; CHECK: call void @nobuiltin() #39
+; CHECK: call void @nobuiltin() #40
         ret void;
 }
 
@@ -368,6 +368,12 @@ define void @f62() nosync
   ret void
 }
 
+; CHECK: define void @f63() #39
+define void @f63() sanitize_memtag
+{
+  ret void;
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -407,4 +413,5 @@ define void @f62() nosync
 ; CHECK: attributes #36 = { willreturn }
 ; CHECK: attributes #37 = { nofree }
 ; CHECK: attributes #38 = { nosync }
-; CHECK: attributes #39 = { nobuiltin }
+; CHECK: attributes #39 = { sanitize_memtag }
+; CHECK: attributes #40 = { nobuiltin }
diff --git a/llvm/test/Transforms/Inline/attributes.ll b/llvm/test/Transforms/Inline/attributes.ll
index 028f3b0f19783..81c189a3c7a70 100644
--- a/llvm/test/Transforms/Inline/attributes.ll
+++ b/llvm/test/Transforms/Inline/attributes.ll
@@ -22,6 +22,10 @@ define i32 @sanitize_memory_callee(i32 %i) sanitize_memory {
   ret i32 %i
 }
 
+define i32 @sanitize_memtag_callee(i32 %i) sanitize_memtag {
+  ret i32 %i
+}
+
 define i32 @safestack_callee(i32 %i) safestack {
   ret i32 %i
 }
@@ -50,6 +54,10 @@ define i32 @alwaysinline_sanitize_memory_callee(i32 %i) alwaysinline sanitize_me
   ret i32 %i
 }
 
+define i32 @alwaysinline_sanitize_memtag_callee(i32 %i) alwaysinline sanitize_memtag {
+  ret i32 %i
+}
+
 define i32 @alwaysinline_safestack_callee(i32 %i) alwaysinline safestack {
   ret i32 %i
 }
@@ -104,6 +112,17 @@ define i32 @test_no_sanitize_thread(i32 %arg) {
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @test_no_sanitize_memtag(i32 %arg) {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memtag_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memtag_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_no_sanitize_memtag(
+; CHECK-NEXT: @sanitize_memtag_callee
+; CHECK-NEXT: ret i32
+}
+
 
 ; Check that:
 ;  * noattr callee is not inlined into sanitize_(address|memory|thread) caller,
@@ -154,6 +173,17 @@ define i32 @test_sanitize_thread(i32 %arg) sanitize_thread {
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @test_sanitize_memtag(i32 %arg) sanitize_memtag {
+  %x1 = call i32 @noattr_callee(i32 %arg)
+  %x2 = call i32 @sanitize_memtag_callee(i32 %x1)
+  %x3 = call i32 @alwaysinline_callee(i32 %x2)
+  %x4 = call i32 @alwaysinline_sanitize_memtag_callee(i32 %x3)
+  ret i32 %x4
+; CHECK-LABEL: @test_sanitize_memtag(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
 define i32 @test_safestack(i32 %arg) safestack {
   %x1 = call i32 @noattr_callee(i32 %arg)
   %x2 = call i32 @safestack_callee(i32 %x1)
diff --git a/llvm/utils/emacs/llvm-mode.el b/llvm/utils/emacs/llvm-mode.el
index c4a1b1f02bfa7..73b02763d0166 100644
--- a/llvm/utils/emacs/llvm-mode.el
+++ b/llvm/utils/emacs/llvm-mode.el
@@ -26,7 +26,7 @@
          "inaccessiblemem_or_argmemonly" "inlinehint" "jumptable" "minsize" "naked" "nobuiltin"
          "noduplicate" "noimplicitfloat" "noinline" "nonlazybind" "noredzone" "noreturn"
          "norecurse" "nounwind" "optnone" "optsize" "readnone" "readonly" "returns_twice"
-         "speculatable" "ssp" "sspreq" "sspstrong" "safestack" "sanitize_address" "sanitize_hwaddress"
+         "speculatable" "ssp" "sspreq" "sspstrong" "safestack" "sanitize_address" "sanitize_hwaddress" "sanitize_memtag"
          "sanitize_thread" "sanitize_memory" "strictfp" "uwtable" "writeonly" "immarg") 'symbols) . font-lock-constant-face)
    ;; Variables
    '("%[-a-zA-Z$._][-a-zA-Z$._0-9]*" . font-lock-variable-name-face)

From dfcd4384cbcac0eeb7e5cbce350f875ba4da79d5 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Mon, 15 Jul 2019 20:06:01 +0000
Subject: [PATCH 156/451] [libc++] Implement P0433: deduction guides for
 <unordered_map>

Thanks to Arthur O'Dwyer for the patch.

Differential Revision: https://reviews.llvm.org/D58590

llvm-svn: 366124
---
 libcxx/include/unordered_map                  | 146 ++++++++++++-
 .../unord.map/unord.map.cnstr/deduct.fail.cpp | 106 +++++++++
 .../unord.map/unord.map.cnstr/deduct.pass.cpp | 204 ++++++++++++++++++
 .../unord.map.cnstr/deduct_const.pass.cpp     | 172 +++++++++++++++
 .../unord.multimap.cnstr/deduct.fail.cpp      | 106 +++++++++
 .../unord.multimap.cnstr/deduct.pass.cpp      | 204 ++++++++++++++++++
 .../deduct_const.pass.cpp                     | 173 +++++++++++++++
 libcxx/www/cxx2a_status.html                  |   2 +-
 8 files changed, 1106 insertions(+), 7 deletions(-)
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.fail.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp

diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index 4dfe69868e543..63aecc8bc0ef5 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -844,9 +844,9 @@ public:
     // types
     typedef _Key                                           key_type;
     typedef _Tp                                            mapped_type;
-    typedef _Hash                                          hasher;
-    typedef _Pred                                          key_equal;
-    typedef _Alloc                                         allocator_type;
+    typedef typename __identity<_Hash>::type               hasher;
+    typedef typename __identity<_Pred>::type               key_equal;
+    typedef typename __identity<_Alloc>::type              allocator_type;
     typedef pair<const key_type, mapped_type>              value_type;
     typedef value_type&                                    reference;
     typedef const value_type&                              const_reference;
@@ -1348,6 +1348,73 @@ private:
 #endif
 };
 
+#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+template<class _InputIterator,
+         class _Hash = hash<__iter_key_type<_InputIterator>>,
+         class _Pred = equal_to<__iter_key_type<_InputIterator>>,
+         class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<!__is_allocator<_Pred>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type = 0,
+              _Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
+  -> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Hash, _Pred, _Allocator>;
+
+template<class _Key, class _Tp, class _Hash = hash<remove_const_t<_Key>>,
+         class _Pred = equal_to<remove_const_t<_Key>>,
+         class _Allocator = allocator<pair<const _Key, _Tp>>,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<!__is_allocator<_Pred>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type = 0,
+              _Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
+  -> unordered_map<remove_const_t<_Key>, _Tp, _Hash, _Pred, _Allocator>;
+
+template<class _InputIterator, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
+  -> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
+                   hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
+
+template<class _InputIterator, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(_InputIterator, _InputIterator, _Allocator)
+  -> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
+                   hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
+
+template<class _InputIterator, class _Hash, class _Allocator,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
+  -> unordered_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
+                   _Hash, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
+
+template<class _Key, class _Tp, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Allocator)
+  -> unordered_map<remove_const_t<_Key>, _Tp,
+                   hash<remove_const_t<_Key>>,
+                   equal_to<remove_const_t<_Key>>, _Allocator>;
+
+template<class _Key, class _Tp, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(initializer_list<pair<_Key, _Tp>>, _Allocator)
+  -> unordered_map<remove_const_t<_Key>, _Tp,
+                   hash<remove_const_t<_Key>>,
+                   equal_to<remove_const_t<_Key>>, _Allocator>;
+
+template<class _Key, class _Tp, class _Hash, class _Allocator,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_map(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
+  -> unordered_map<remove_const_t<_Key>, _Tp, _Hash,
+                   equal_to<remove_const_t<_Key>>, _Allocator>;
+#endif
+
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
         size_type __n, const hasher& __hf, const key_equal& __eql)
@@ -1673,9 +1740,9 @@ public:
     // types
     typedef _Key                                           key_type;
     typedef _Tp                                            mapped_type;
-    typedef _Hash                                          hasher;
-    typedef _Pred                                          key_equal;
-    typedef _Alloc                                         allocator_type;
+    typedef typename __identity<_Hash>::type               hasher;
+    typedef typename __identity<_Pred>::type               key_equal;
+    typedef typename __identity<_Alloc>::type              allocator_type;
     typedef pair<const key_type, mapped_type>              value_type;
     typedef value_type&                                    reference;
     typedef const value_type&                              const_reference;
@@ -2041,6 +2108,73 @@ public:
 
 };
 
+#ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES
+template<class _InputIterator,
+         class _Hash = hash<__iter_key_type<_InputIterator>>,
+         class _Pred = equal_to<__iter_key_type<_InputIterator>>,
+         class _Allocator = allocator<__iter_to_alloc_type<_InputIterator>>,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<!__is_allocator<_Pred>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type = 0,
+                   _Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
+  -> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Hash, _Pred, _Allocator>;
+
+template<class _Key, class _Tp, class _Hash = hash<remove_const_t<_Key>>,
+         class _Pred = equal_to<remove_const_t<_Key>>,
+         class _Allocator = allocator<pair<const _Key, _Tp>>,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<!__is_allocator<_Pred>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type = 0,
+                   _Hash = _Hash(), _Pred = _Pred(), _Allocator = _Allocator())
+  -> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash, _Pred, _Allocator>;
+
+template<class _InputIterator, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Allocator)
+  -> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
+                        hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
+
+template<class _InputIterator, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(_InputIterator, _InputIterator, _Allocator)
+  -> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
+                        hash<__iter_key_type<_InputIterator>>, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
+
+template<class _InputIterator, class _Hash, class _Allocator,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(_InputIterator, _InputIterator, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
+  -> unordered_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>,
+                        _Hash, equal_to<__iter_key_type<_InputIterator>>, _Allocator>;
+
+template<class _Key, class _Tp, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Allocator)
+  -> unordered_multimap<remove_const_t<_Key>, _Tp,
+                        hash<remove_const_t<_Key>>,
+                        equal_to<remove_const_t<_Key>>, _Allocator>;
+
+template<class _Key, class _Tp, class _Allocator,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(initializer_list<pair<_Key, _Tp>>, _Allocator)
+  -> unordered_multimap<remove_const_t<_Key>, _Tp,
+                        hash<remove_const_t<_Key>>,
+                        equal_to<remove_const_t<_Key>>, _Allocator>;
+
+template<class _Key, class _Tp, class _Hash, class _Allocator,
+         class = _EnableIf<!__is_allocator<_Hash>::value>,
+         class = _EnableIf<!is_integral<_Hash>::value>,
+         class = _EnableIf<__is_allocator<_Allocator>::value>>
+unordered_multimap(initializer_list<pair<_Key, _Tp>>, typename allocator_traits<_Allocator>::size_type, _Hash, _Allocator)
+  -> unordered_multimap<remove_const_t<_Key>, _Tp, _Hash,
+                        equal_to<remove_const_t<_Key>>, _Allocator>;
+#endif
+
 template <class _Key, class _Tp, class _Hash, class _Pred, class _Alloc>
 unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
         size_type __n, const hasher& __hf, const key_equal& __eql)
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.fail.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.fail.cpp
new file mode 100644
index 0000000000000..642abce8a27bb
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.fail.cpp
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// UNSUPPORTED: libcpp-no-deduction-guides
+// XFAIL: clang-6, apple-clang-9.0, apple-clang-9.1, apple-clang-10.0
+
+// template<class InputIterator,
+//          class Hash = hash<iter-key-type<InputIterator>>,
+//          class Pred = equal_to<iter-key-type<InputIterator>>,
+//          class Allocator = allocator<iter-to-alloc-type<InputIterator>>>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type = see below,
+//               Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash, Pred,
+//                    Allocator>;
+//
+// template<class Key, class T, class Hash = hash<Key>,
+//          class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+// unordered_map(initializer_list<pair<Key, T>>,
+//               typename see below::size_type = see below, Hash = Hash(),
+//               Pred = Pred(), Allocator = Allocator())
+//   -> unordered_map<Key, T, Hash, Pred, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_map(InputIterator, InputIterator, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Hash, class Allocator>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, typename see below::size_type, Allocator)
+//   -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, Allocator)
+//   -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Hash, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, typename see below::size_type, Hash,
+//               Allocator)
+//   -> unordered_map<Key, T, Hash, equal_to<Key>, Allocator>;
+
+#include <functional>
+#include <unordered_map>
+
+int main(int, char**)
+{
+    using P = std::pair<const int, int>;
+    {
+        // cannot deduce Key from nothing
+        std::unordered_map m; // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Size)
+        std::unordered_map m(42); // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash)
+        std::unordered_map m(42, std::hash<int>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash, Pred)
+        std::unordered_map m(42, std::hash<int>(), std::equal_to<int>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash, Pred, Allocator)
+        std::unordered_map m(42, std::hash<int>(), std::equal_to<int>(), std::allocator<P>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Allocator)
+        std::unordered_map m(std::allocator<P>{});
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Allocator)
+        std::unordered_map m(42, std::allocator<P>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash, Allocator)
+        std::unordered_map m(42, std::hash<int>(), std::allocator<P>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}}
+    }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp
new file mode 100644
index 0000000000000..0923597dcc994
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp
@@ -0,0 +1,204 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// UNSUPPORTED: libcpp-no-deduction-guides
+
+// template<class InputIterator,
+//          class Hash = hash<iter-key-type<InputIterator>>,
+//          class Pred = equal_to<iter-key-type<InputIterator>>,
+//          class Allocator = allocator<iter-to-alloc-type<InputIterator>>>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type = see below,
+//               Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash, Pred,
+//                    Allocator>;
+//
+// template<class Key, class T, class Hash = hash<Key>,
+//          class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+// unordered_map(initializer_list<pair<Key, T>>,
+//               typename see below::size_type = see below, Hash = Hash(),
+//               Pred = Pred(), Allocator = Allocator())
+//   -> unordered_map<Key, T, Hash, Pred, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_map(InputIterator, InputIterator, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Hash, class Allocator>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, typename see below::size_type, Allocator)
+//   -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, Allocator)
+//   -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Hash, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, typename see below::size_type, Hash,
+//               Allocator)
+//   -> unordered_map<Key, T, Hash, equal_to<Key>, Allocator>;
+
+#include <algorithm> // is_permutation
+#include <cassert>
+#include <climits> // INT_MAX
+#include <type_traits>
+#include <unordered_map>
+
+#include "test_allocator.h"
+
+using P = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+int main(int, char**)
+{
+    const PC expected_m[] = { {1,1}, {2,2}, {3,1}, {INT_MAX,1} };
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<int>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 41));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 41);
+    }
+
+    {
+    std::unordered_map<int, long> source;
+    std::unordered_map m(source);
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_map<int, long> source;
+    std::unordered_map m{source};  // braces instead of parens
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_map<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>> source;
+    test_allocator<PC> a(0, 42);
+    std::unordered_map m(source, a);
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.get_allocator().get_id() == 42);
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_map<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>> source;
+    test_allocator<PC> a(0, 43);
+    std::unordered_map m{source, a};  // braces instead of parens
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.get_allocator().get_id() == 43);
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_map m { P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} };
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 44));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 44);
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, test_allocator<PC>(0, 45));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 45);
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>(), test_allocator<PC>(0, 46));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 46);
+    }
+
+    {
+    std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, test_allocator<PC>(0, 47));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 47);
+    }
+
+    {
+    std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>(), test_allocator<PC>(0, 48));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 48);
+    }
+
+    return 0;
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp
new file mode 100644
index 0000000000000..1fb4d674d5fb0
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp
@@ -0,0 +1,172 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// UNSUPPORTED: libcpp-no-deduction-guides
+
+// template<class InputIterator,
+//          class Hash = hash<iter-key-type<InputIterator>>,
+//          class Pred = equal_to<iter-key-type<InputIterator>>,
+//          class Allocator = allocator<iter-to-alloc-type<InputIterator>>>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type = see below,
+//               Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash, Pred,
+//                    Allocator>;
+//
+// template<class Key, class T, class Hash = hash<Key>,
+//          class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+// unordered_map(initializer_list<pair<Key, T>>,
+//               typename see below::size_type = see below, Hash = Hash(),
+//               Pred = Pred(), Allocator = Allocator())
+//   -> unordered_map<Key, T, Hash, Pred, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_map(InputIterator, InputIterator, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Hash, class Allocator>
+// unordered_map(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+//   -> unordered_map<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, typename see below::size_type, Allocator)
+//   -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, Allocator)
+//   -> unordered_map<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Hash, class Allocator>
+// unordered_map(initializer_list<pair<Key, T>>, typename see below::size_type, Hash,
+//               Allocator)
+//   -> unordered_map<Key, T, Hash, equal_to<Key>, Allocator>;
+
+#include <algorithm> // std::is_permutation
+#include <cassert>
+#include <climits> // INT_MAX
+#include <type_traits>
+#include <unordered_map>
+
+#include "test_allocator.h"
+
+using P = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+int main(int, char**)
+{
+    const PC expected_m[] = { {1,1L}, {2,2L}, {3,1L}, {INT_MAX,1L} };
+
+    {
+    const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} };
+    std::unordered_map m(std::begin(arr), std::end(arr));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<int>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 41));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 41);
+    }
+
+    {
+    std::unordered_map m { PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} };
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 44));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 44);
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, test_allocator<PC>(0, 45));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 45);
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash<short>(), test_allocator<PC>(0, 46));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 46);
+    }
+
+    {
+    std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, test_allocator<PC>(0, 47));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 47);
+    }
+
+    {
+    std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>(), test_allocator<PC>(0, 48));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_map<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 48);
+    }
+
+    return 0;
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp
new file mode 100644
index 0000000000000..7f170472d7a52
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// UNSUPPORTED: libcpp-no-deduction-guides
+// XFAIL: clang-6, apple-clang-9.0, apple-clang-9.1, apple-clang-10.0
+
+// template<class InputIterator,
+//          class Hash = hash<iter-key-type<InputIterator>>,
+//          class Pred = equal_to<iter-key-type<InputIterator>>,
+//          class Allocator = allocator<iter-to-alloc-type<InputIterator>>>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below,
+//               Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash, Pred,
+//                    Allocator>;
+//
+// template<class Key, class T, class Hash = hash<Key>,
+//          class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+// unordered_multimap(initializer_list<pair<Key, T>>,
+//               typename see below::size_type = see below, Hash = Hash(),
+//               Pred = Pred(), Allocator = Allocator())
+//   -> unordered_multimap<Key, T, Hash, Pred, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                    hash<iter-key-type<InputIterator>>,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Hash, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash,
+//                    equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, typename see below::size_type, Allocator)
+//   -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, Allocator)
+//   -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Hash, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, typename see below::size_type, Hash,
+//               Allocator)
+//   -> unordered_multimap<Key, T, Hash, equal_to<Key>, Allocator>;
+
+#include <functional>
+#include <unordered_map>
+
+int main(int, char**)
+{
+    using P = std::pair<const int, int>;
+    {
+        // cannot deduce Key from nothing
+        std::unordered_multimap m; // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Size)
+        std::unordered_multimap m(42); // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash)
+        std::unordered_multimap m(42, std::hash<int>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash, Pred)
+        std::unordered_multimap m(42, std::hash<int>(), std::equal_to<int>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash, Pred, Allocator)
+        std::unordered_multimap m(42, std::hash<int>(), std::equal_to<int>(), std::allocator<P>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Allocator)
+        std::unordered_multimap m(std::allocator<P>{});
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Allocator)
+        std::unordered_multimap m(42, std::allocator<P>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+    {
+        // cannot deduce Key from just (Size, Hash, Allocator)
+        std::unordered_multimap m(42, std::hash<int>(), std::allocator<P>());
+            // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}}
+    }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp
new file mode 100644
index 0000000000000..f620f1e37112c
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp
@@ -0,0 +1,204 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// UNSUPPORTED: libcpp-no-deduction-guides
+
+// template<class InputIterator,
+//          class Hash = hash<iter-key-type<InputIterator>>,
+//          class Pred = equal_to<iter-key-type<InputIterator>>,
+//          class Allocator = allocator<iter-to-alloc-type<InputIterator>>>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below,
+//                    Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash, Pred,
+//                         Allocator>;
+//
+// template<class Key, class T, class Hash = hash<Key>,
+//          class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+// unordered_multimap(initializer_list<pair<Key, T>>,
+//                    typename see below::size_type = see below, Hash = Hash(),
+//                    Pred = Pred(), Allocator = Allocator())
+//   -> unordered_multimap<Key, T, Hash, Pred, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                         hash<iter-key-type<InputIterator>>,
+//                         equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                         hash<iter-key-type<InputIterator>>,
+//                         equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Hash, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash,
+//                         equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, typename see below::size_type, Allocator)
+//   -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, Allocator)
+//   -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Hash, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, typename see below::size_type, Hash,
+//                    Allocator)
+//   -> unordered_multimap<Key, T, Hash, equal_to<Key>, Allocator>;
+
+#include <algorithm> // is_permutation
+#include <cassert>
+#include <climits> // INT_MAX
+#include <type_traits>
+#include <unordered_map>
+
+#include "test_allocator.h"
+
+using P = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+int main(int, char**)
+{
+    const PC expected_m[] = { {1,1}, {1,1}, {2,2}, {3,1}, {INT_MAX,1} };
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<int>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 41));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 41);
+    }
+
+    {
+    std::unordered_multimap<int, long> source;
+    std::unordered_multimap m(source);
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_multimap<int, long> source;
+    std::unordered_multimap m{source};  // braces instead of parens
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>> source;
+    test_allocator<PC> a(0, 42);
+    std::unordered_multimap m(source, a);
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.get_allocator().get_id() == 42);
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>> source;
+    test_allocator<PC> a(0, 43);
+    std::unordered_multimap m{source, a};  // braces instead of parens
+    ASSERT_SAME_TYPE(decltype(m), decltype(source));
+    assert(m.get_allocator().get_id() == 43);
+    assert(m.size() == 0);
+    }
+
+    {
+    std::unordered_multimap m { P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} };
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 44));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 44);
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, test_allocator<PC>(0, 45));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 45);
+    }
+
+    {
+    const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>(), test_allocator<PC>(0, 46));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 46);
+    }
+
+    {
+    std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, test_allocator<PC>(0, 47));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 47);
+    }
+
+    {
+    std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash<short>(), test_allocator<PC>(0, 48));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 48);
+    }
+
+    return 0;
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp
new file mode 100644
index 0000000000000..8a4a383641173
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp
@@ -0,0 +1,173 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+// UNSUPPORTED: c++98, c++03, c++11, c++14
+// UNSUPPORTED: libcpp-no-deduction-guides
+
+// template<class InputIterator,
+//          class Hash = hash<iter-key-type<InputIterator>>,
+//          class Pred = equal_to<iter-key-type<InputIterator>>,
+//          class Allocator = allocator<iter-to-alloc-type<InputIterator>>>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below,
+//                    Hash = Hash(), Pred = Pred(), Allocator = Allocator())
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash, Pred,
+//                         Allocator>;
+//
+// template<class Key, class T, class Hash = hash<Key>,
+//          class Pred = equal_to<Key>, class Allocator = allocator<pair<const Key, T>>>
+// unordered_multimap(initializer_list<pair<Key, T>>,
+//                    typename see below::size_type = see below, Hash = Hash(),
+//                    Pred = Pred(), Allocator = Allocator())
+//   -> unordered_multimap<Key, T, Hash, Pred, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                         hash<iter-key-type<InputIterator>>,
+//                         equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>,
+//                         hash<iter-key-type<InputIterator>>,
+//                         equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class InputIterator, class Hash, class Allocator>
+// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator)
+//   -> unordered_multimap<iter-key-type<InputIterator>, iter-mapped-type<InputIterator>, Hash,
+//                         equal_to<iter-key-type<InputIterator>>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, typename see below::size_type, Allocator)
+//   -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, Allocator)
+//   -> unordered_multimap<Key, T, hash<Key>, equal_to<Key>, Allocator>;
+//
+// template<class Key, class T, class Hash, class Allocator>
+// unordered_multimap(initializer_list<pair<Key, T>>, typename see below::size_type, Hash,
+//                    Allocator)
+//   -> unordered_multimap<Key, T, Hash, equal_to<Key>, Allocator>;
+
+#include <algorithm> // is_permutation
+#include <cassert>
+#include <climits> // INT_MAX
+#include <functional>
+#include <type_traits>
+#include <unordered_map>
+
+#include "test_allocator.h"
+
+using P = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+int main(int, char**)
+{
+    const PC expected_m[] = { {1,1}, {1,1}, {2,2}, {3,1}, {INT_MAX,1} };
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<int>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 41));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 41);
+    }
+
+    {
+    std::unordered_multimap m { PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} };
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42);
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>(), std::equal_to<>());
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    }
+
+    {
+    std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>(), std::equal_to<>(), test_allocator<PC>(0, 44));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 44);
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, test_allocator<PC>(0, 45));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 45);
+    }
+
+    {
+    const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} };
+    std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash<short>(), test_allocator<PC>(0, 46));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 46);
+    }
+
+    {
+    std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, test_allocator<PC>(0, 47));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<int>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 47);
+    }
+
+    {
+    std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash<short>(), test_allocator<PC>(0, 48));
+    ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap<int, long, std::hash<short>, std::equal_to<int>, test_allocator<PC>>);
+    assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m)));
+    assert(m.get_allocator().get_id() == 48);
+    }
+
+    return 0;
+}
diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html
index b800ef07bfad9..2b19eb86ba332 100644
--- a/libcxx/www/cxx2a_status.html
+++ b/libcxx/www/cxx2a_status.html
@@ -283,7 +283,7 @@ <h3>Library Working group Issues Status</h3>
 	<tr><td><a href="https://wg21.link/LWG2996">2996</a></td><td>Missing rvalue overloads for <tt>shared_ptr</tt> operations</td><td>San Diego</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG3008">3008</a></td><td><tt>make_shared</tt> (sub)object destruction semantics are not specified</td><td>San Diego</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG3022">3022</a></td><td><tt>is_convertible&lt;derived*, base*&gt;</tt> may lead to ODR</td><td>San Diego</td><td>Resolved by 1285R0</td></tr>
-	<tr><td><a href="https://wg21.link/LWG3025">3025</a></td><td>Map-like container deduction guides should use <tt>pair&lt;Key, T&gt;</tt>, not <tt>pair&lt;const Key, T&gt;</tt></td><td>San Diego</td><td></td></tr>
+	<tr><td><a href="https://wg21.link/LWG3025">3025</a></td><td>Map-like container deduction guides should use <tt>pair&lt;Key, T&gt;</tt>, not <tt>pair&lt;const Key, T&gt;</tt></td><td>San Diego</td><td>Complete</td></tr>
 	<tr><td><a href="https://wg21.link/LWG3031">3031</a></td><td>Algorithms and predicates with non-const reference arguments</td><td>San Diego</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG3037">3037</a></td><td><tt>polymorphic_allocator</tt> and incomplete types</td><td>San Diego</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/LWG3038">3038</a></td><td><tt>polymorphic_allocator::allocate</tt> should not allow integer overflow to create vulnerabilities</td><td>San Diego</td><td></td></tr>

From 66ee934440c21dc2bd6ff938c79dad4ce032990a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 20:20:18 +0000
Subject: [PATCH 157/451] AMDGPU/GlobalISel: Allow scalar s1 and/or/xor

If a 1-bit value is in a 32-bit VGPR, the scalar opcodes set SCC to
whether the result is 0. If the inputs are SCC, these can be copied to
a 32-bit SGPR to produce an SCC result.

llvm-svn: 366125
---
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   97 +-
 .../GlobalISel/regbankselect-and-s1.mir       |  527 +++++++
 .../AMDGPU/GlobalISel/regbankselect-and.mir   |  145 +-
 .../AMDGPU/GlobalISel/regbankselect-or.mir    |   15 +-
 .../GlobalISel/regbankselect-phi-s1.mir       | 1333 +++++++++++++++++
 .../AMDGPU/GlobalISel/regbankselect-xor.mir   |   15 +-
 6 files changed, 1964 insertions(+), 168 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index be05d9cb0ec6b..815cbc5e26ee4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -108,14 +108,22 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
 unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
                                           const RegisterBank &Src,
                                           unsigned Size) const {
+  // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane?
   if (Dst.getID() == AMDGPU::SGPRRegBankID &&
       Src.getID() == AMDGPU::VGPRRegBankID) {
     return std::numeric_limits<unsigned>::max();
   }
 
-  // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by
-  // the valu.
-  if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID &&
+  // Bool values are tricky, because the meaning is based on context. The SCC
+  // and VCC banks are for the natural scalar and vector conditions produced by
+  // a compare.
+  //
+  // Legalization doesn't know about the necessary context, so an s1 use may
+  // have been a truncate from an arbitrary value, in which case a copy (lowered
+  // as a compare with 0) needs to be inserted.
+  if (Size == 1 &&
+      (Dst.getID() == AMDGPU::SCCRegBankID ||
+       Dst.getID() == AMDGPU::SGPRRegBankID) &&
       (Src.getID() == AMDGPU::SGPRRegBankID ||
        Src.getID() == AMDGPU::VGPRRegBankID ||
        Src.getID() == AMDGPU::VCCRegBankID))
@@ -333,6 +341,35 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
   case TargetOpcode::G_OR:
   case TargetOpcode::G_XOR: {
     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+
+    if (Size == 1) {
+      // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0.
+      const InstructionMapping &SCCMapping = getInstructionMapping(
+        1, 1, getOperandsMapping(
+          {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+        3); // Num Operands
+      AltMappings.push_back(&SCCMapping);
+
+      const InstructionMapping &SGPRMapping = getInstructionMapping(
+        1, 1, getOperandsMapping(
+          {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+           AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
+        3); // Num Operands
+      AltMappings.push_back(&SGPRMapping);
+
+      const InstructionMapping &VCCMapping0 = getInstructionMapping(
+        2, 10, getOperandsMapping(
+          {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+              AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size),
+              AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}),
+        3); // Num Operands
+      AltMappings.push_back(&VCCMapping0);
+      return AltMappings;
+    }
+
     if (Size != 64)
       break;
 
@@ -360,7 +397,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
       3); // Num Operands
     AltMappings.push_back(&SVMapping);
 
-    // SGPR in LHS is slightly preferrable, so make it VS more expnesive than
+    // SGPR in LHS is slightly preferrable, so make it VS more expensive than
     // SV.
     const InstructionMapping &VSMapping = getInstructionMapping(
       3, 4, getOperandsMapping(
@@ -1551,8 +1588,56 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_XOR: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     if (Size == 1) {
-      OpdsMapping[0] = OpdsMapping[1] =
-        OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size);
+      const RegisterBank *DstBank
+        = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI);
+
+      unsigned TargetBankID = -1;
+      unsigned BankLHS = -1;
+      unsigned BankRHS = -1;
+      if (DstBank) {
+        TargetBankID = DstBank->getID();
+        if (DstBank == &AMDGPU::VCCRegBank) {
+          TargetBankID = AMDGPU::VCCRegBankID;
+          BankLHS = AMDGPU::VCCRegBankID;
+          BankRHS = AMDGPU::VCCRegBankID;
+        } else if (DstBank == &AMDGPU::SCCRegBank) {
+          TargetBankID = AMDGPU::SCCRegBankID;
+          BankLHS = AMDGPU::SGPRRegBankID;
+          BankRHS = AMDGPU::SGPRRegBankID;
+        } else {
+          BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+                                 AMDGPU::SGPRRegBankID);
+          BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                                 AMDGPU::SGPRRegBankID);
+        }
+      } else {
+        BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI,
+                               AMDGPU::VCCRegBankID);
+        BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+                               AMDGPU::VCCRegBankID);
+
+        // Both inputs should be true booleans to produce a boolean result.
+        if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
+          TargetBankID = AMDGPU::VGPRRegBankID;
+        } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
+          TargetBankID = AMDGPU::VCCRegBankID;
+          BankLHS = AMDGPU::VCCRegBankID;
+          BankRHS = AMDGPU::VCCRegBankID;
+        } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
+          TargetBankID = AMDGPU::SGPRRegBankID;
+        } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) {
+          // The operation must be done on a 32-bit register, but it will set
+          // scc. The result type could interchangably be SCC or SGPR, since
+          // both values will be produced.
+          TargetBankID = AMDGPU::SCCRegBankID;
+          BankLHS = AMDGPU::SGPRRegBankID;
+          BankRHS = AMDGPU::SGPRRegBankID;
+        }
+      }
+
+      OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size);
+      OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size);
+      OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size);
       break;
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir
new file mode 100644
index 0000000000000..620a8e1e71bdc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir
@@ -0,0 +1,527 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=FAST %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=GREEDY %s
+
+---
+name: and_s1_sgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_sgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_sgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:_(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_scc_scc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_scc_scc
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; FAST: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1)
+    ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]]
+    ; GREEDY-LABEL: name: and_s1_scc_scc
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_ICMP intpred(eq), %0, %2
+    %4:_(s1) = G_ICMP intpred(eq), %1, %2
+    %5:_(s1) = G_AND %3, %4
+...
+
+---
+name: and_s1_vgpr_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; FAST-LABEL: name: and_s1_vgpr_vgpr
+    ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_vgpr_vgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:_(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_vcc_vcc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; FAST-LABEL: name: and_s1_vcc_vcc
+    ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
+    ; GREEDY-LABEL: name: and_s1_vcc_vcc
+    ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_ICMP intpred(eq), %0, %2
+    %4:_(s1) = G_ICMP intpred(eq), %1, %2
+    %5:_(s1) = G_AND %3, %4
+...
+
+---
+name: and_s1_sgpr_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; FAST-LABEL: name: and_s1_sgpr_vgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_sgpr_vgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:_(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_vgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr0
+    ; FAST-LABEL: name: and_s1_vgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_vgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:_(s1) = G_AND %2, %3
+...
+
+# FIXME: Should just change the result bank of the scc compare.
+---
+name: and_s1_scc_vcc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; FAST-LABEL: name: and_s1_scc_vcc
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]]
+    ; GREEDY-LABEL: name: and_s1_scc_vcc
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_ICMP intpred(eq), %0, %2
+    %4:_(s1) = G_ICMP intpred(eq), %1, %2
+    %5:_(s1) = G_AND %3, %4
+...
+
+---
+name: and_s1_vcc_scc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; FAST-LABEL: name: and_s1_vcc_scc
+    ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
+    ; GREEDY-LABEL: name: and_s1_vcc_scc
+    ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+    ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_ICMP intpred(eq), %0, %2
+    %4:_(s1) = G_ICMP intpred(eq), %1, %2
+    %5:_(s1) = G_AND %3, %4
+...
+
+
+# Test with a known result bank
+---
+name: and_s1_vcc_sgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_vcc_sgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    ; GREEDY-LABEL: name: and_s1_vcc_sgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[AND]](s1)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:vcc(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_vcc_vgpr_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; FAST-LABEL: name: and_s1_vcc_vgpr_vgpr
+    ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    ; GREEDY-LABEL: name: and_s1_vcc_vgpr_vgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:vcc(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_vcc_vgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+    ; FAST-LABEL: name: and_s1_vcc_vgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    ; GREEDY-LABEL: name: and_s1_vcc_vgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:vcc(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_vcc_sgpr_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+    ; FAST-LABEL: name: and_s1_vcc_sgpr_vgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    ; GREEDY-LABEL: name: and_s1_vcc_sgpr_vgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:vcc(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_vgpr_sgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_vgpr_sgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_vgpr_sgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:vgpr(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_sgpr_sgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_sgpr_sgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_sgpr_sgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:sgpr(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_scc_sgpr_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_scc_sgpr_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GREEDY-LABEL: name: and_s1_scc_sgpr_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s1) = G_TRUNC %0
+    %3:_(s1) = G_TRUNC %1
+    %4:scc(s1) = G_AND %2, %3
+...
+
+---
+name: and_s1_scc_scc_scc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_scc_scc_scc
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
+    ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
+    ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; FAST: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1)
+    ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]]
+    ; GREEDY-LABEL: name: and_s1_scc_scc_scc
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
+    ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
+    ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_ICMP intpred(ne), %0, %2
+    %4:_(s1) = G_ICMP intpred(ne), %1, %2
+    %5:scc(s1) = G_AND %3, %4
+
+...
+
+---
+name: and_s1_scc_sgpr_scc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_scc_sgpr_scc
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
+    ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[COPY2]]
+    ; GREEDY-LABEL: name: and_s1_scc_sgpr_scc
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
+    ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[COPY2]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_TRUNC %0
+    %4:_(s1) = G_ICMP intpred(ne), %1, %2
+    %5:scc(s1) = G_AND %3, %4
+
+...
+
+---
+name: and_s1_scc_scc_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; FAST-LABEL: name: and_s1_scc_scc_sgpr
+    ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
+    ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[TRUNC]]
+    ; GREEDY-LABEL: name: and_s1_scc_scc_sgpr
+    ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+    ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
+    ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+    ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[TRUNC]]
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_CONSTANT i32 0
+    %3:_(s1) = G_ICMP intpred(ne), %0, %2
+    %4:_(s1) = G_TRUNC %1
+    %5:scc(s1) = G_AND %3, %4
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
index 1d05930295c45..fbfadad6c55d2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir
@@ -67,150 +67,6 @@ body: |
     %2:_(s32) = G_AND %0, %1
 ...
 
----
-name: and_i1_scc_scc
-legalized: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $sgpr1
-    ; CHECK-LABEL: name: and_i1_scc_scc
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP1]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
-    ; CHECK: S_NOP 0, implicit [[AND]](s1)
-      %0:_(s32) = COPY $sgpr0
-      %1:_(s32) = COPY $sgpr1
-      %2:_(s32) = G_CONSTANT i32 0
-      %4:_(s1) = G_ICMP intpred(ne), %0, %2
-      %5:_(s1) = G_ICMP intpred(ne), %1, %2
-      %6:_(s1) = G_AND %4, %5
-      S_NOP 0, implicit %6
-...
-
----
-name: and_i1_vcc_vcc
-legalized: true
-
-body: |
-  bb.0:
-    liveins: $vgpr0, $vgpr1
-    ; CHECK-LABEL: name: and_i1_vcc_vcc
-    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
-    ; CHECK: S_NOP 0, implicit [[AND]](s1)
-      %0:_(s32) = COPY $vgpr0
-      %1:_(s32) = COPY $vgpr1
-      %2:_(s32) = G_CONSTANT i32 0
-      %4:_(s1) = G_ICMP intpred(ne), %0, %2
-      %5:_(s1) = G_ICMP intpred(ne), %1, %2
-      %6:_(s1) = G_AND %4, %5
-      S_NOP 0, implicit %6
-...
-
----
-name: and_i1_scc_vcc
-legalized: true
-
-body: |
-  bb.0:
-    liveins: $sgpr0, $vgpr0
-    ; CHECK-LABEL: name: and_i1_scc_vcc
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]]
-    ; CHECK: S_NOP 0, implicit [[AND]](s1)
-      %0:_(s32) = COPY $sgpr0
-      %1:_(s32) = COPY $vgpr0
-      %2:_(s32) = G_CONSTANT i32 0
-      %4:_(s1) = G_ICMP intpred(ne), %0, %2
-      %5:_(s1) = G_ICMP intpred(ne), %1, %2
-      %6:_(s1) = G_AND %4, %5
-      S_NOP 0, implicit %6
-...
-
----
-name: and_i1_sgpr_trunc_sgpr_trunc
-legalized: true
-body:             |
-  bb.0.entry:
-    liveins: $sgpr0, $sgpr1
-    ; CHECK-LABEL: name: and_i1_sgpr_trunc_sgpr_trunc
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
-    ; CHECK: S_NOP 0, implicit [[AND]](s1)
-    %0:_(s32) = COPY $sgpr0
-    %1:_(s32) = COPY $sgpr1
-    %2:_(s1) = G_TRUNC %0
-    %3:_(s1) = G_TRUNC %1
-    %4:_(s1) = G_AND %2, %3
-    S_NOP 0, implicit %4
-
-...
-
----
-name: and_i1_trunc_scc
-legalized: true
-body:             |
-  bb.0.entry:
-    liveins: $sgpr0, $sgpr1
-    ; CHECK-LABEL: name: and_i1_trunc_scc
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]]
-    ; CHECK: S_NOP 0, implicit [[AND]](s1)
-    %0:_(s32) = COPY $sgpr0
-    %1:_(s32) = COPY $sgpr1
-    %2:_(s1) = G_TRUNC %0
-    %3:_(s1) = G_ICMP intpred(ne), %0, %1
-    %4:_(s1) = G_AND %2, %3
-    S_NOP 0, implicit %4
-...
-
----
-name: and_i1_s_trunc_vcc
-legalized: true
-body:             |
-  bb.0.entry:
-    liveins: $sgpr0, $vgpr0
-    ; CHECK-LABEL: name: and_i1_s_trunc_vcc
-    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-    ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP]]
-    ; CHECK: S_NOP 0, implicit [[AND]](s1)
-    %0:_(s32) = COPY $sgpr0
-    %1:_(s32) = COPY $vgpr0
-    %2:_(s1) = G_TRUNC %0
-    %3:_(s1) = G_ICMP intpred(ne), %0, %1
-    %4:_(s1) = G_AND %2, %3
-    S_NOP 0, implicit %4
-...
-
 ---
 name: and_s64_ss
 legalized: true
@@ -737,3 +593,4 @@ body: |
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_AND %0, %1
 ...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
index aed682fc671e5..96f9e5fcaacd2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir
@@ -80,9 +80,9 @@ body: |
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
     ; CHECK: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP1]](s1)
-    ; CHECK: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY2]], [[COPY3]]
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1)
+    ; CHECK: [[OR:%[0-9]+]]:scc(s1) = G_OR [[COPY2]], [[COPY3]]
     ; CHECK: S_NOP 0, implicit [[OR]](s1)
       %0:_(s32) = COPY $sgpr0
       %1:_(s32) = COPY $sgpr1
@@ -153,9 +153,7 @@ body:             |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
-    ; CHECK: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY2]], [[COPY3]]
+    ; CHECK: [[OR:%[0-9]+]]:sgpr(s1) = G_OR [[TRUNC]], [[TRUNC1]]
     ; CHECK: S_NOP 0, implicit [[OR]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
@@ -177,9 +175,8 @@ body:             |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY2]], [[COPY3]]
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; CHECK: [[OR:%[0-9]+]]:scc(s1) = G_OR [[TRUNC]], [[COPY2]]
     ; CHECK: S_NOP 0, implicit [[OR]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir
new file mode 100644
index 0000000000000..d1aa28d445d01
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir
@@ -0,0 +1,1333 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=FAST %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GREEDY %s
+
+---
+name: phi_s1_scc_scc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_scc_scc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_scc_scc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0, $sgpr1, $sgpr2
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = COPY $sgpr2
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_scc_scc_scc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_scc_scc_scc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; FAST:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; FAST:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.3
+  ; FAST:   G_BR %bb.1
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; FAST:   [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99
+  ; FAST:   [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888
+  ; FAST:   [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]]
+  ; FAST:   [[ICMP3:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]]
+  ; FAST:   G_BRCOND [[ICMP3]](s1), %bb.3
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   successors: %bb.3(0x80000000)
+  ; FAST:   [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123
+  ; FAST:   [[ICMP4:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]]
+  ; FAST:   G_BR %bb.3
+  ; FAST: bb.3:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1, [[ICMP4]](s1), %bb.2
+  ; FAST:   [[COPY4:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY]], [[COPY1]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_scc_scc_scc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; GREEDY:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; GREEDY:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.3
+  ; GREEDY:   G_BR %bb.1
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GREEDY:   [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99
+  ; GREEDY:   [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888
+  ; GREEDY:   [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]]
+  ; GREEDY:   [[ICMP3:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]]
+  ; GREEDY:   G_BRCOND [[ICMP3]](s1), %bb.3
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   successors: %bb.3(0x80000000)
+  ; GREEDY:   [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123
+  ; GREEDY:   [[ICMP4:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]]
+  ; GREEDY:   G_BR %bb.3
+  ; GREEDY: bb.3:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1, [[ICMP4]](s1), %bb.2
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY6]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.3
+    liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = COPY $sgpr2
+    %3:_(s32) = COPY $sgpr3
+    %4:_(s32) = G_CONSTANT i32 0
+    %5:_(s1) = G_ICMP intpred(eq), %0, %3
+    %6:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %6, %bb.3
+    G_BR %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.3
+
+    %7:_(s32) = G_CONSTANT i32 99
+    %8:_(s32) = G_CONSTANT i32 888
+    %9:_(s1) = G_ICMP intpred(eq), %1, %7
+    %10:_(s1) = G_ICMP intpred(eq), %1, %8
+    G_BRCOND %10, %bb.3
+    G_BR %bb.2
+
+  bb.2:
+    successors: %bb.3
+
+    %11:_(s32) = G_CONSTANT i32 123
+    %12:_(s1) = G_ICMP intpred(eq), %2, %11
+    G_BR %bb.3
+
+  bb.3:
+    %13:_(s1) = G_PHI %5, %bb.0, %9, %bb.1, %12, %bb.2
+    %14:_(s32) = G_SELECT %13, %0, %1
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %14
+
+...
+
+---
+name: phi_s1_scc_vcc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_scc_vcc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $sgpr0, $sgpr1, $vgpr0
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0, $sgpr1, $vgpr0
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_vcc_scc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_vcc_scc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY1]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %1
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_vcc_vcc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_vcc_vcc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY3]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY3]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $vgpr1, $sgpr0
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $sgpr0
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_s_scc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_s_scc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_s_scc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0, $sgpr1, $sgpr2
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = COPY $sgpr2
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_scc_s_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_scc_s_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_scc_s_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0, $sgpr1, $sgpr2
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = COPY $sgpr2
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_scc_v_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_scc_v_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr2
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_v_scc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_v_scc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_vcc_s_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_vcc_s_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_s_vcc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_s_vcc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_vcc_v_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_vcc_v_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; FAST:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP1]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $vgpr1, $sgpr0
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $sgpr0
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_ICMP intpred(eq), %0, %3
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_v_vcc_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_v_vcc_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $vgpr1, $sgpr0
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $sgpr0
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_ICMP intpred(eq), %1, %3
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_v_s_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_v_s_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_v_s_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $sgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_s_v_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_s_v_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_s_v_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $sgpr0, $sgpr1
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $sgpr0, $sgpr1
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $vgpr0
+    %2:_(s32) = COPY $sgpr1
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_v_v_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_v_v_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; FAST:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; FAST:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_v_v_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $vgpr0, $vgpr1, $sgpr0
+  ; GREEDY:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $vgpr0, $vgpr1, $sgpr0
+
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = COPY $sgpr0
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
+
+---
+name: phi_s1_s_s_sbranch
+legalized: true
+tracksRegLiveness: true
+
+body: |
+  ; FAST-LABEL: name: phi_s1_s_s_sbranch
+  ; FAST: bb.0:
+  ; FAST:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; FAST:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; FAST:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; FAST:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; FAST:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; FAST:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; FAST:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; FAST:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; FAST:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.1:
+  ; FAST:   successors: %bb.2(0x80000000)
+  ; FAST:   [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; FAST:   G_BR %bb.2
+  ; FAST: bb.2:
+  ; FAST:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; FAST:   [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1)
+  ; FAST:   [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]]
+  ; FAST:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  ; GREEDY-LABEL: name: phi_s1_s_s_sbranch
+  ; GREEDY: bb.0:
+  ; GREEDY:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GREEDY:   liveins: $sgpr0, $sgpr1, $sgpr2
+  ; GREEDY:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+  ; GREEDY:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+  ; GREEDY:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+  ; GREEDY:   [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+  ; GREEDY:   [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
+  ; GREEDY:   [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]]
+  ; GREEDY:   G_BRCOND [[ICMP]](s1), %bb.1
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.1:
+  ; GREEDY:   successors: %bb.2(0x80000000)
+  ; GREEDY:   [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
+  ; GREEDY:   G_BR %bb.2
+  ; GREEDY: bb.2:
+  ; GREEDY:   [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1
+  ; GREEDY:   [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1)
+  ; GREEDY:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+  ; GREEDY:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+  ; GREEDY:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]]
+  ; GREEDY:   S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32)
+  bb.0:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0, $sgpr1, $sgpr2
+
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = COPY $sgpr2
+    %3:_(s32) = G_CONSTANT i32 0
+    %4:_(s1) = G_TRUNC %0
+    %5:_(s1) = G_ICMP intpred(eq), %2, %3
+    G_BRCOND %5, %bb.1
+    G_BR %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:_(s1) = G_TRUNC %1
+    G_BR %bb.2
+
+  bb.2:
+    %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1
+    %8:_(s32) = G_SELECT %7, %3, %0
+    S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir
index c666ca1be04d8..389e42dcd6bc8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir
@@ -80,9 +80,9 @@ body: |
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]]
     ; CHECK: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP1]](s1)
-    ; CHECK: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY2]], [[COPY3]]
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1)
+    ; CHECK: [[XOR:%[0-9]+]]:scc(s1) = G_XOR [[COPY2]], [[COPY3]]
     ; CHECK: S_NOP 0, implicit [[XOR]](s1)
       %0:_(s32) = COPY $sgpr0
       %1:_(s32) = COPY $sgpr1
@@ -153,9 +153,7 @@ body:             |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1)
-    ; CHECK: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY2]], [[COPY3]]
+    ; CHECK: [[XOR:%[0-9]+]]:sgpr(s1) = G_XOR [[TRUNC]], [[TRUNC1]]
     ; CHECK: S_NOP 0, implicit [[XOR]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
@@ -177,9 +175,8 @@ body:             |
     ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
     ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1)
-    ; CHECK: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY2]], [[COPY3]]
+    ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1)
+    ; CHECK: [[XOR:%[0-9]+]]:scc(s1) = G_XOR [[TRUNC]], [[COPY2]]
     ; CHECK: S_NOP 0, implicit [[XOR]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1

From 39d888c1e42a62e02adfb5fe8da54cafe7e62b0b Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Mon, 15 Jul 2019 20:22:27 +0000
Subject: [PATCH 158/451] [TSan] Improve handling of stack pointer mangling in
 {set,long}jmp, pt.9

Switch over to computing the xor key in C, instead of assembly for
Linux/AArch64.

llvm-svn: 366126
---
 .../lib/tsan/rtl/tsan_platform_linux.cc       | 27 ++++---------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
index 0f23da0e877fe..db85d547b0588 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
@@ -302,26 +302,8 @@ void InitializePlatform() {
       CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
       reexec = true;
     }
-    // Initialize the guard pointer used in {sig}{set,long}jump.
-    longjmp_xor_key = InitializeGuardPtr();
-    // uptr old_value = longjmp_xor_key;
-    // InitializeLongjmpXorKey();
-    // CHECK_EQ(longjmp_xor_key, old_value);
-    // If the above check fails for you, please contact me (jlettner@apple.com)
-    // and let me know the values of the two differing keys.  Please also set a
-    // breakpoint on `InitializeGuardPtr` and `InitializeLongjmpXorKey` and tell
-    // me the stack pointer (SP) values that go into the XOR operation (where we
-    // derive the key):
-    //
-    //   InitializeLongjmpXorKey:
-    //     uptr sp = (uptr)__builtin_frame_address(0);
-    //
-    //   InitializeGuardPtr (in tsan_rtl_aarch64.S):
-    //       mov  x0, sp
-    //       ...
-    //       eor  x0, x0, x1
-    //
-    // Then feel free to comment out the call to `InitializeLongjmpXorKey`.
+    // Initialize the xor key used in {sig}{set,long}jump.
+    InitializeLongjmpXorKey();
 #endif
     if (reexec)
       ReExec();
@@ -437,9 +419,10 @@ static void InitializeLongjmpXorKey() {
   jmp_buf env;
   REAL(_setjmp)(env);
 
-  // 2. Retrieve mangled/vanilla SP.
+  // 2. Retrieve vanilla/mangled SP.
+  uptr sp;
+  asm("mov  %0, %%sp" : "=r" (sp));
   uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT];
-  uptr sp = (uptr)__builtin_frame_address(0);
 
   // 3. xor SPs to obtain key.
   longjmp_xor_key = mangled_sp ^ sp;

From 794346460afa212690d0f006023a0e770884f0bb Mon Sep 17 00:00:00 2001
From: Bob Haarman <llvm@inglorion.net>
Date: Mon, 15 Jul 2019 20:51:44 +0000
Subject: [PATCH 159/451] [clang] allow -fthinlto-index= without -x ir

Summary:
Previously, passing -fthinlto-index= to clang required that bitcode
files be explicitly marked by -x ir. This change makes us detect files
with object file extensions as bitcode files when -fthinlto-index= is
present, so that explicitly marking them is no longer necessary.
Explicitly specifying -x ir is still accepted and continues to be part
of the test case to ensure we continue to support it.

Reviewers: tejohnson, rnk, pcc

Subscribers: mehdi_amini, steven_wu, dexonsmith, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64610

llvm-svn: 366127
---
 clang/include/clang/Basic/DiagnosticDriverKinds.td |  2 ++
 clang/lib/Driver/Driver.cpp                        |  6 ++++++
 clang/lib/Driver/ToolChains/Clang.cpp              |  3 +--
 clang/test/Driver/thinlto_backend.c                | 13 ++++++++++---
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index dd86ca49b7a28..eab453ee20ec9 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -159,6 +159,8 @@ def err_drv_cannot_read_config_file : Error<
   "cannot read configuration file '%0'">;
 def err_drv_nested_config_file: Error<
   "option '--config' is not allowed inside configuration file">;
+def err_drv_arg_requires_bitcode_input: Error<
+  "option '%0' requires input to be LLVM bitcode">;
 
 def err_target_unsupported_arch
   : Error<"the target architecture '%0' is not supported by the target '%1'">;
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 22f26d90bd7dd..087335562d0ae 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2119,6 +2119,12 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
               Diag(clang::diag::warn_drv_treating_input_as_cxx)
                   << getTypeName(OldTy) << getTypeName(Ty);
           }
+
+          // If running with -fthinlto-index=, extensions that normally identify
+          // native object files actually identify LLVM bitcode files.
+          if (Args.hasArgNoClaim(options::OPT_fthinlto_index_EQ) &&
+              Ty == types::TY_Object)
+            Ty = types::TY_LLVM_BC;
         }
 
         // -ObjC and -ObjC++ override the default language, but only for "source
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6a83e1a480a5d..6c3074b69e9f9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3647,8 +3647,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) {
     if (!types::isLLVMIR(Input.getType()))
-      D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
-                                                       << "-x ir";
+      D.Diag(diag::err_drv_arg_requires_bitcode_input) << A->getAsString(Args);
     Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
   }
 
diff --git a/clang/test/Driver/thinlto_backend.c b/clang/test/Driver/thinlto_backend.c
index b2b45f57088ab..7a3d6ede7c0da 100644
--- a/clang/test/Driver/thinlto_backend.c
+++ b/clang/test/Driver/thinlto_backend.c
@@ -2,8 +2,14 @@
 // RUN: llvm-lto -thinlto -o %t %t.o
 
 // -fthinlto_index should be passed to cc1
-// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -### 2>&1 | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION
+// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -### \
+// RUN:     2>&1 | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION
 // CHECK-THINLTOBE-ACTION: -fthinlto-index=
+// CHECK-THINLTOBE-ACTION-SAME: {{"?-x"? "?ir"?}}
+
+// Check that this also works without -x ir.
+// RUN: %clang -O2 -o %t1.o %t.o -c -fthinlto-index=%t.thinlto.bc -### 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION
 
 // -save-temps should be passed to cc1
 // RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps -### 2>&1 | FileCheck %s -check-prefix=CHECK-SAVE-TEMPS -check-prefix=CHECK-SAVE-TEMPS-CWD
@@ -15,5 +21,6 @@
 // CHECK-SAVE-TEMPS-NOT: -emit-llvm-bc
 
 // Ensure clang driver gives the expected error for incorrect input type
-// RUN: not %clang -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING
-// CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir'
+// RUN: not %clang -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 \
+// RUN:     | FileCheck %s -check-prefix=CHECK-WARNING
+// CHECK-WARNING: error: option '-fthinlto-index={{.*}}' requires input to be LLVM bitcode

From d00d8578016520a4113c6930a2a6053785e66eac Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 20:59:42 +0000
Subject: [PATCH 160/451] TableGen: Add address space to matchers

Currently AMDGPU uses a CodePatPred to check address spaces from the
MachineMemOperand. Introduce a new first class property so that the
existing patterns can be easily modified to uses the new generated
predicate, which will also be handled for GlobalISel.

I would prefer these to match against the pointer type of the
instruction, but that would be difficult to get working with
SelectionDAG compatbility. This is much easier for now and will avoid
a painful tablegen rewrite for all the loads and stores.

I'm also not sure if there's a better way to encode multiple address
spaces in the table, rather than putting the number to expect.

llvm-svn: 366128
---
 .../CodeGen/GlobalISel/InstructionSelector.h  | 10 +++
 .../GlobalISel/InstructionSelectorImpl.h      | 39 +++++++++
 .../include/llvm/Target/TargetSelectionDAG.td |  6 ++
 llvm/test/TableGen/address-space-patfrags.td  | 85 +++++++++++++++++++
 llvm/utils/TableGen/CodeGenDAGPatterns.cpp    | 34 +++++++-
 llvm/utils/TableGen/CodeGenDAGPatterns.h      |  2 +
 llvm/utils/TableGen/GlobalISelEmitter.cpp     | 81 +++++++++++++++++-
 7 files changed, 253 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/TableGen/address-space-patfrags.td

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index e4d05a57bd365..e9b93be76754f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -138,6 +138,16 @@ enum {
   /// - MMOIdx - MMO index
   /// - Size - The size in bytes of the memory access
   GIM_CheckMemorySizeEqualTo,
+
+  /// Check the address space of the memory access for the given machine memory
+  /// operand.
+  /// - InsnID - Instruction ID
+  /// - MMOIdx - MMO index
+  /// - NumAddrSpace - Number of valid address spaces
+  /// - AddrSpaceN - An allowed space of the memory access
+  /// - AddrSpaceN+1 ...
+  GIM_CheckMemoryAddressSpace,
+
   /// Check the size of the memory access for the given machine memory operand
   /// against the size of an operand.
   /// - InsnID - Instruction ID
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
index e010180903d0e..e8ee4af0cb0b5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
@@ -370,6 +370,45 @@ bool InstructionSelector::executeMatchTable(
             return false;
       break;
     }
+    case GIM_CheckMemoryAddressSpace: {
+      int64_t InsnID = MatchTable[CurrentIdx++];
+      int64_t MMOIdx = MatchTable[CurrentIdx++];
+      // This accepts a list of possible address spaces.
+      const int NumAddrSpace = MatchTable[CurrentIdx++];
+
+      if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
+        if (handleReject() == RejectAndGiveUp)
+          return false;
+        break;
+      }
+
+      // Need to still jump to the end of the list of address spaces if we find
+      // a match earlier.
+      const uint64_t LastIdx = CurrentIdx + NumAddrSpace;
+
+      const MachineMemOperand *MMO
+        = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+      const unsigned MMOAddrSpace = MMO->getAddrSpace();
+
+      bool Success = false;
+      for (int I = 0; I != NumAddrSpace; ++I) {
+        unsigned AddrSpace = MatchTable[CurrentIdx++];
+        DEBUG_WITH_TYPE(
+          TgtInstructionSelector::getName(),
+          dbgs() << "addrspace(" << MMOAddrSpace << ") vs "
+                 << AddrSpace << '\n');
+
+        if (AddrSpace == MMOAddrSpace) {
+          Success = true;
+          break;
+        }
+      }
+
+      CurrentIdx = LastIdx;
+      if (!Success && handleReject() == RejectAndGiveUp)
+        return false;
+      break;
+    }
     case GIM_CheckMemorySizeEqualTo: {
       int64_t InsnID = MatchTable[CurrentIdx++];
       int64_t MMOIdx = MatchTable[CurrentIdx++];
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 3b5c767fc8727..b913a054ac2cb 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -737,6 +737,10 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
   // cast<StoreSDNode>(N)->isTruncatingStore();
   bit IsTruncStore = ?;
 
+  // cast<MemSDNode>(N)->getAddressSpace() ==
+  // If this empty, accept any address space.
+  list<int> AddressSpaces = ?;
+
   // cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Monotonic
   bit IsAtomicOrderingMonotonic = ?;
   // cast<AtomicSDNode>(N)->getOrdering() == AtomicOrdering::Acquire
@@ -762,6 +766,8 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
   // cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
   // cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::<VT>;
   ValueType ScalarMemoryVT = ?;
+
+  // TODO: Add alignment
 }
 
 // PatFrag - A version of PatFrags matching only a single fragment.
diff --git a/llvm/test/TableGen/address-space-patfrags.td b/llvm/test/TableGen/address-space-patfrags.td
new file mode 100644
index 0000000000000..f6c5d11449803
--- /dev/null
+++ b/llvm/test/TableGen/address-space-patfrags.td
@@ -0,0 +1,85 @@
+// RUN: llvm-tblgen -gen-dag-isel -I %p/../../include %s 2>&1 | FileCheck -check-prefix=SDAG %s
+// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/../../include %s -o - < %s | FileCheck -check-prefix=GISEL %s
+
+include "llvm/Target/Target.td"
+
+def TestTargetInstrInfo : InstrInfo;
+
+
+def TestTarget : Target {
+  let InstructionSet = TestTargetInstrInfo;
+}
+
+def R0 : Register<"r0"> { let Namespace = "MyTarget"; }
+def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0)>;
+
+
+// With one address space
+def pat_frag_a : PatFrag <(ops node:$ptr), (load node:$ptr), [{}]> {
+  let AddressSpaces = [ 999 ];
+  let IsLoad = 1; // FIXME: Can this be inferred?
+  let MemoryVT = i32;
+}
+
+// With multiple address spaces
+def pat_frag_b : PatFrag <(ops node:$ptr), (load node:$ptr), [{}]> {
+  let AddressSpaces = [ 123, 455 ];
+  let IsLoad = 1; // FIXME: Can this be inferred?
+  let MemoryVT = i32;
+}
+
+def inst_a : Instruction {
+  let OutOperandList = (outs GPR32:$dst);
+  let InOperandList = (ins GPR32:$src);
+}
+
+def inst_b : Instruction {
+  let OutOperandList = (outs GPR32:$dst);
+  let InOperandList = (ins GPR32:$src);
+}
+
+// SDAG: case 2: {
+// SDAG: // Predicate_pat_frag_a
+// SDAG-NEXT: SDNode *N = Node;
+// SDAG-NEXT: (void)N;
+// SDAG-NEXT: unsigned AddrSpace = cast<MemSDNode>(N)->getAddressSpace();
+
+// SDAG-NEXT: if (AddrSpace != 999)
+// SDAG-NEXT: return false;
+// SDAG-NEXT: if (cast<MemSDNode>(N)->getMemoryVT() != MVT::i32) return false;
+// SDAG-NEXT: return true;
+
+// GISEL: GIM_Try, /*On fail goto*//*Label 0*/ 47, // Rule ID 0 //
+// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2,
+// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD,
+// GISEL-NEXT: GIM_CheckMemorySizeEqualToLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0,
+// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/1, /*AddrSpace*/999,
+// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/4,
+// GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic,
+def : Pat <
+  (pat_frag_a GPR32:$src),
+  (inst_a GPR32:$src)
+>;
+
+// SDAG: case 3: {
+// SDAG-NEXT: // Predicate_pat_frag_b
+// SDAG-NEXT: SDNode *N = Node;
+// SDAG-NEXT: (void)N;
+// SDAG-NEXT: unsigned AddrSpace = cast<MemSDNode>(N)->getAddressSpace();
+// SDAG-NEXT: if (AddrSpace != 123 && AddrSpace != 455)
+// SDAG-NEXT: return false;
+// SDAG-NEXT: if (cast<MemSDNode>(N)->getMemoryVT() != MVT::i32) return false;
+// SDAG-NEXT: return true;
+
+
+// GISEL: GIM_Try, /*On fail goto*//*Label 1*/ 95, // Rule ID 1 //
+// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2,
+// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD,
+// GISEL-NEXT: GIM_CheckMemorySizeEqualToLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0,
+// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/2, /*AddrSpace*/123, /*AddrSpace*/455,
+// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/4,
+// GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic,
+def : Pat <
+  (pat_frag_b GPR32:$src),
+  (inst_b GPR32:$src)
+>;
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 9f87b3d591dc7..a0e8696001b0e 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -954,13 +954,33 @@ std::string TreePredicateFn::getPredCode() const {
   }
 
   if (isLoad() || isStore() || isAtomic()) {
-    StringRef SDNodeName =
-        isLoad() ? "LoadSDNode" : isStore() ? "StoreSDNode" : "AtomicSDNode";
+    if (ListInit *AddressSpaces = getAddressSpaces()) {
+      Code += "unsigned AddrSpace = cast<MemSDNode>(N)->getAddressSpace();\n"
+        " if (";
+
+      bool First = true;
+      for (Init *Val : AddressSpaces->getValues()) {
+        if (First)
+          First = false;
+        else
+          Code += " && ";
+
+        IntInit *IntVal = dyn_cast<IntInit>(Val);
+        if (!IntVal) {
+          PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(),
+                          "AddressSpaces element must be integer");
+        }
+
+        Code += "AddrSpace != " + utostr(IntVal->getValue());
+      }
+
+      Code += ")\nreturn false;\n";
+    }
 
     Record *MemoryVT = getMemoryVT();
 
     if (MemoryVT)
-      Code += ("if (cast<" + SDNodeName + ">(N)->getMemoryVT() != MVT::" +
+      Code += ("if (cast<MemSDNode>(N)->getMemoryVT() != MVT::" +
                MemoryVT->getName() + ") return false;\n")
                   .str();
   }
@@ -1149,6 +1169,14 @@ Record *TreePredicateFn::getMemoryVT() const {
     return nullptr;
   return R->getValueAsDef("MemoryVT");
 }
+
+ListInit *TreePredicateFn::getAddressSpaces() const {
+  Record *R = getOrigPatFragRecord()->getRecord();
+  if (R->isValueUnset("AddressSpaces"))
+    return nullptr;
+  return R->getValueAsListInit("AddressSpaces");
+}
+
 Record *TreePredicateFn::getScalarMemoryVT() const {
   Record *R = getOrigPatFragRecord()->getRecord();
   if (R->isValueUnset("ScalarMemoryVT"))
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index bf36ebf26ccf3..2b49a64c3f1d6 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -593,6 +593,8 @@ class TreePredicateFn {
   /// ValueType record for the memory VT.
   Record *getScalarMemoryVT() const;
 
+  ListInit *getAddressSpaces() const;
+
   // If true, indicates that GlobalISel-based C++ code was supplied.
   bool hasGISelPredicateCode() const;
   std::string getGISelPredicateCode() const;
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 4ec7a81927836..4940d911f6637 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -232,6 +232,23 @@ static std::string explainPredicates(const TreePatternNode *N) {
     if (Record *VT = P.getScalarMemoryVT())
       Explanation += (" ScalarVT(MemVT)=" + VT->getName()).str();
 
+    if (ListInit *AddrSpaces = P.getAddressSpaces()) {
+      raw_string_ostream OS(Explanation);
+      OS << " AddressSpaces=[";
+
+      StringRef AddrSpaceSeparator;
+      for (Init *Val : AddrSpaces->getValues()) {
+        IntInit *IntVal = dyn_cast<IntInit>(Val);
+        if (!IntVal)
+          continue;
+
+        OS << AddrSpaceSeparator << IntVal->getValue();
+        AddrSpaceSeparator = ", ";
+      }
+
+      OS << ']';
+    }
+
     if (P.isAtomicOrderingMonotonic())
       Explanation += " monotonic";
     if (P.isAtomicOrderingAcquire())
@@ -308,6 +325,12 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
         continue;
     }
 
+    if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+      const ListInit *AddrSpaces = Predicate.getAddressSpaces();
+      if (AddrSpaces && !AddrSpaces->empty())
+        continue;
+    }
+
     if (Predicate.isAtomic() && Predicate.getMemoryVT())
       continue;
 
@@ -1028,6 +1051,7 @@ class PredicateMatcher {
     IPM_AtomicOrderingMMO,
     IPM_MemoryLLTSize,
     IPM_MemoryVsLLTSize,
+    IPM_MemoryAddressSpace,
     IPM_GenericPredicate,
     OPM_SameOperand,
     OPM_ComplexPattern,
@@ -1789,6 +1813,42 @@ class MemorySizePredicateMatcher : public InstructionPredicateMatcher {
   }
 };
 
+class MemoryAddressSpacePredicateMatcher : public InstructionPredicateMatcher {
+protected:
+  unsigned MMOIdx;
+  SmallVector<unsigned, 4> AddrSpaces;
+
+public:
+  MemoryAddressSpacePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+                                     ArrayRef<unsigned> AddrSpaces)
+      : InstructionPredicateMatcher(IPM_MemoryAddressSpace, InsnVarID),
+        MMOIdx(MMOIdx), AddrSpaces(AddrSpaces.begin(), AddrSpaces.end()) {}
+
+  static bool classof(const PredicateMatcher *P) {
+    return P->getKind() == IPM_MemoryAddressSpace;
+  }
+  bool isIdentical(const PredicateMatcher &B) const override {
+    if (!InstructionPredicateMatcher::isIdentical(B))
+      return false;
+    auto *Other = cast<MemoryAddressSpacePredicateMatcher>(&B);
+    return MMOIdx == Other->MMOIdx && AddrSpaces == Other->AddrSpaces;
+  }
+
+  void emitPredicateOpcodes(MatchTable &Table,
+                            RuleMatcher &Rule) const override {
+    Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
+          << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+          << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+        // Encode number of address spaces to expect.
+          << MatchTable::Comment("NumAddrSpace")
+          << MatchTable::IntValue(AddrSpaces.size());
+    for (unsigned AS : AddrSpaces)
+      Table << MatchTable::Comment("AddrSpace") << MatchTable::IntValue(AS);
+
+    Table << MatchTable::LineBreak;
+  }
+};
+
 /// Generates code to check that the size of an MMO is less-than, equal-to, or
 /// greater than a given LLT.
 class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
@@ -3210,7 +3270,26 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       continue;
     }
 
-    // G_LOAD is used for both non-extending and any-extending loads. 
+    // An address space check is needed in all contexts if there is one.
+    if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) {
+      if (const ListInit *AddrSpaces = Predicate.getAddressSpaces()) {
+        SmallVector<unsigned, 4> ParsedAddrSpaces;
+
+        for (Init *Val : AddrSpaces->getValues()) {
+          IntInit *IntVal = dyn_cast<IntInit>(Val);
+          if (!IntVal)
+            return failedImport("Address space is not an integer");
+          ParsedAddrSpaces.push_back(IntVal->getValue());
+        }
+
+        if (!ParsedAddrSpaces.empty()) {
+          InsnMatcher.addPredicate<MemoryAddressSpacePredicateMatcher>(
+            0, ParsedAddrSpaces);
+        }
+      }
+    }
+
+    // G_LOAD is used for both non-extending and any-extending loads.
     if (Predicate.isLoad() && Predicate.isNonExtLoad()) {
       InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
           0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0);

From 02772499551ada7e72fa83515e563e770020ba55 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 21:15:20 +0000
Subject: [PATCH 161/451] TableGen/GlobalISel: Fix handling of truncstore
 patterns

This was failing to import the AMDGPU truncstore patterns. The
truncating stores from 32-bit to 8/16 were then somehow being
incorrectly selected to a 4-byte store.

A separate check is emitted for the LLT size in comparison to the
specific memory VT, which looks strange to me but makes sense based on
the hierarchy of PatFrags used for the default truncstore PatFrags.

llvm-svn: 366129
---
 llvm/test/TableGen/address-space-patfrags.td | 38 ++++++++++++++++++++
 llvm/utils/TableGen/GlobalISelEmitter.cpp    |  9 ++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/llvm/test/TableGen/address-space-patfrags.td b/llvm/test/TableGen/address-space-patfrags.td
index f6c5d11449803..029170e8414fa 100644
--- a/llvm/test/TableGen/address-space-patfrags.td
+++ b/llvm/test/TableGen/address-space-patfrags.td
@@ -38,6 +38,11 @@ def inst_b : Instruction {
   let InOperandList = (ins GPR32:$src);
 }
 
+def inst_c : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins GPR32:$src0, GPR32:$src1);
+}
+
 // SDAG: case 2: {
 // SDAG: // Predicate_pat_frag_a
 // SDAG-NEXT: SDNode *N = Node;
@@ -83,3 +88,36 @@ def : Pat <
   (pat_frag_b GPR32:$src),
   (inst_b GPR32:$src)
 >;
+
+
+def truncstorei16_addrspace : PatFrag<(ops node:$val, node:$ptr),
+                                (truncstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let MemoryVT = i16;
+  let AddressSpaces = [ 123, 455 ];
+}
+
+// Test truncstore without a specific MemoryVT
+// GISEL: GIM_Try, /*On fail goto*//*Label 2*/ 133, // Rule ID 2 //
+// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2,
+// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE,
+// GISEL-NEXT: GIM_CheckMemorySizeLessThanLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0,
+// GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic,
+// GISEL-NEXT: // MIs[0] src0
+// GISEL-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
+def : Pat <
+  (truncstore GPR32:$src0, GPR32:$src1),
+  (inst_c GPR32:$src0, GPR32:$src1)
+>;
+
+// Test truncstore with specific MemoryVT
+// GISEL: GIM_Try, /*On fail goto*//*Label 3*/ 181, // Rule ID 3 //
+// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2,
+// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE,
+// GISEL-NEXT: GIM_CheckMemorySizeLessThanLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0,
+// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/2, /*AddrSpace*/123, /*AddrSpace*/455,
+// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/2,
+def : Pat <
+  (truncstorei16_addrspace GPR32:$src0, GPR32:$src1),
+  (inst_c GPR32:$src0, GPR32:$src1)
+>;
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 4940d911f6637..f1c02134198bd 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -314,7 +314,7 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) {
         Predicate.isSignExtLoad() || Predicate.isZeroExtLoad())
       continue;
 
-    if (Predicate.isNonTruncStore())
+    if (Predicate.isNonTruncStore() || Predicate.isTruncStore())
       continue;
 
     if (Predicate.isLoad() && Predicate.getMemoryVT())
@@ -3301,6 +3301,13 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
       continue;
     }
 
+    if (Predicate.isStore() && Predicate.isTruncStore()) {
+      // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size.
+      InsnMatcher.addPredicate<MemoryVsLLTSizePredicateMatcher>(
+        0, MemoryVsLLTSizePredicateMatcher::LessThan, 0);
+      continue;
+    }
+
     // No check required. We already did it by swapping the opcode.
     if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
         Predicate.isSignExtLoad())

From c4f245b40aad7e8627b37a8bf1bdcdbcd541e665 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 15 Jul 2019 21:16:29 +0000
Subject: [PATCH 162/451] [LoopUnroll+LoopUnswitch] do not transform loops
 containing callbr

Summary:
There is currently a correctness issue when unrolling loops containing
callbr's where their indirect targets are being updated correctly to the
newly created labels, but their operands are not.  This manifests in
unrolled loops where the second and subsequent copies of callbr
instructions have blockaddresses of the label from the first instance of
the unrolled loop, which would result in nonsensical runtime control
flow.

For now, conservatively do not unroll the loop.  In the future, I think
we can pursue unrolling such loops provided we transform the cloned
callbr's operands correctly.

Such a transform and its legalities are being discussed in:
https://reviews.llvm.org/D64101

Link: https://bugs.llvm.org/show_bug.cgi?id=42489
Link: https://groups.google.com/forum/#!topic/clang-built-linux/z-hRWP9KqPI

Reviewers: fhahn, hfinkel, efriedma

Reviewed By: fhahn, hfinkel, efriedma

Subscribers: efriedma, hiraditya, zzheng, dmgreen, llvm-commits, pirama, kees, nathanchance, E5ten, craig.topper, chandlerc, glider, void, srhines

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64368

llvm-svn: 366130
---
 llvm/lib/Analysis/LoopInfo.cpp              |  5 +-
 llvm/test/Transforms/LoopUnroll/callbr.ll   | 51 ++++++++++++++++
 llvm/test/Transforms/LoopUnswitch/callbr.ll | 66 +++++++++++++++++++++
 3 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/callbr.ll
 create mode 100644 llvm/test/Transforms/LoopUnswitch/callbr.ll

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 00dbe30c2b3db..c59e48a7a98e1 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -432,8 +432,11 @@ bool Loop::isLoopSimplifyForm() const {
 bool Loop::isSafeToClone() const {
   // Return false if any loop blocks contain indirectbrs, or there are any calls
   // to noduplicate functions.
+  // FIXME: it should be ok to clone CallBrInst's if we correctly update the
+  // operand list to reflect the newly cloned labels.
   for (BasicBlock *BB : this->blocks()) {
-    if (isa<IndirectBrInst>(BB->getTerminator()))
+    if (isa<IndirectBrInst>(BB->getTerminator()) ||
+        isa<CallBrInst>(BB->getTerminator()))
       return false;
 
     for (Instruction &I : *BB)
diff --git a/llvm/test/Transforms/LoopUnroll/callbr.ll b/llvm/test/Transforms/LoopUnroll/callbr.ll
new file mode 100644
index 0000000000000..22206b46662e3
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/callbr.ll
@@ -0,0 +1,51 @@
+; RUN: opt -loop-unroll -S %s | FileCheck %s
+
+; Check that the loop body exists.
+; CHECK: for.body
+; CHECK: if.then
+; CHECK: asm.fallthrough
+; CHECK: l_yes
+; CHECK: for.inc
+
+; Check that the loop body does not get unrolled.  We could modify this test in
+; the future to support loop unrolling callbr's IFF we checked that the callbr
+; operands were unrolled/updated correctly, as today they are not.
+; CHECK-NOT: if.then.1
+; CHECK-NOT: asm.fallthrough.1
+; CHECK-NOT: l_yes.1
+; CHECK-NOT: for.inc.1
+; CHECK-NOT: if.then.2
+; CHECK-NOT: asm.fallthrough.2
+; CHECK-NOT: l_yes.2
+; CHECK-NOT: for.inc.2
+
+define dso_local void @d() {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.inc
+  ret void
+
+for.body:                                         ; preds = %for.inc, %entry
+  %e.04 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %tobool = icmp eq i32 %e.04, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  callbr void asm sideeffect "1: nop\0A\09.quad b, ${0:l}, $$5\0A\09", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@d, %l_yes))
+          to label %asm.fallthrough [label %l_yes]
+
+asm.fallthrough:                                  ; preds = %if.then
+  br label %l_yes
+
+l_yes:                                            ; preds = %asm.fallthrough, %if.then
+  %call = tail call i32 (...) @g()
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %l_yes
+  %inc = add nuw nsw i32 %e.04, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+declare dso_local i32 @g(...) local_unnamed_addr
diff --git a/llvm/test/Transforms/LoopUnswitch/callbr.ll b/llvm/test/Transforms/LoopUnswitch/callbr.ll
new file mode 100644
index 0000000000000..6e05374d3299f
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnswitch/callbr.ll
@@ -0,0 +1,66 @@
+; RUN: opt -loop-unswitch %s -S | FileCheck %s
+
+; We want to check that the loop does not get split (so only 2 callbr's not 4).
+; It's ok to modify this test in the future should we allow the loop containing
+; callbr to be unswitched and are able to do so correctly.
+
+; CHECK: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10))
+; CHECK: to label %7 [label %10]
+; CHECK: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10))
+; CHECK: to label %9 [label %10]
+
+; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10))
+; CHECK-NOT: to label %7 [label %10]
+; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10))
+; CHECK-NOT: to label %9 [label %10]
+; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %19))
+; CHECK-NOT: to label %16 [label %19]
+; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %19))
+; CHECK-NOT: to label %18 [label %19]
+
+; This test is essentially:
+; void foo(int n) {
+;   for (int i = 0; i < 1000; ++i)
+;     if (n) {
+;       asm goto("# %l0"::::bar);
+;       bar:;
+;     } else {
+;       asm goto("# %l0"::::baz);
+;       baz:;
+;     }
+;}
+
+define dso_local void @foo(i32) #0 {
+  br label %2
+
+2:                                                ; preds = %10, %1
+  %.0 = phi i32 [ 0, %1 ], [ %11, %10 ]
+  %3 = icmp ult i32 %.0, 1000
+  br i1 %3, label %4, label %12
+
+4:                                                ; preds = %2
+  %5 = icmp eq i32 %0, 0
+  br i1 %5, label %8, label %6
+
+6:                                                ; preds = %4
+  callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) #0
+    to label %7 [label %10]
+
+7:                                                ; preds = %6
+  br label %10
+
+8:                                                ; preds = %4
+  callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) #0
+    to label %9 [label %10]
+
+9:                                                ; preds = %8
+  br label %10
+
+10:                                               ; preds = %7, %6, %9, %8
+  %11 = add nuw nsw i32 %.0, 1
+  br label %2
+
+12:                                               ; preds = %2
+  ret void
+}
+

From 228f80d92dc22f3a6cd0b9b98566327f63debbe5 Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Mon, 15 Jul 2019 21:22:57 +0000
Subject: [PATCH 163/451] [TSan] Fix asm token error

llvm-svn: 366131
---
 compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
index db85d547b0588..8e4ddc969e058 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
@@ -421,7 +421,7 @@ static void InitializeLongjmpXorKey() {
 
   // 2. Retrieve vanilla/mangled SP.
   uptr sp;
-  asm("mov  %0, %%sp" : "=r" (sp));
+  asm("mov  %0, %sp" : "=r" (sp));
   uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT];
 
   // 3. xor SPs to obtain key.

From b082f1055b0a5370d1902339ffe058b4abb6abc0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 15 Jul 2019 21:41:44 +0000
Subject: [PATCH 164/451] AMDGPU: Use standalone MUBUF load patterns

We already do this for the flat and DS instructions, although it is
certainly uglier and more verbose.

This will allow using separate pattern definitions for extload and
zextload. Currently we get away with using a single PatFrag with
custom predicate code to check if the extension type is a zextload or
anyextload. The generic mechanism the global isel emitter understands
treats these as mutually exclusive. I was considering making the
pattern emitter accept zextload or sextload extensions for anyextload
patterns, but in global isel, the different extending loads have
distinct opcodes, and there is currently no mechanism for an opcode
matcher to try multiple (and there probably is very little need for
one beyond this case).

llvm-svn: 366132
---
 llvm/lib/Target/AMDGPU/BUFInstructions.td | 57 +++++++++++++++--------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index bc70d138e4280..7d9ca59c6d08a 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -470,6 +470,24 @@ class MUBUF_Load_Pseudo <string opName,
   let dwords = getMUBUFDwords<vdataClass>.ret;
 }
 
+class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
+  (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+  (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+>;
+
+class MUBUF_Addr64_Load_Pat <Instruction inst,
+                            ValueType load_vt = i32,
+                            SDPatternOperator ld = null_frag> : Pat <
+  (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
+  (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
+>;
+
+multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
+  def : MUBUF_Offset_Load_Pat<!cast<Instruction>(BaseInst#"_OFFSET"), load_vt, ld>;
+  def : MUBUF_Addr64_Load_Pat<!cast<Instruction>(BaseInst#"_ADDR64"), load_vt, ld>;
+}
+
+
 // FIXME: tfe can't be an operand because it requires a separate
 // opcode because it needs an N+1 register class dest register.
 multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
@@ -478,20 +496,10 @@ multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
                               bit TiedDest = 0,
                               bit isLds = 0> {
 
-  def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
-    TiedDest, isLds,
-    !if(isLds,
-        [],
-        [(set load_vt:$vdata,
-         (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
+  def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, TiedDest, isLds>,
     MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>;
 
-  def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
-    TiedDest, isLds,
-    !if(isLds,
-        [],
-        [(set load_vt:$vdata,
-         (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>,
+  def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, TiedDest, isLds>,
     MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>;
 
   def _OFFEN  : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, TiedDest, isLds>;
@@ -819,30 +827,39 @@ let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
 } // End HasPackedD16VMem.
 
 defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
+  "buffer_load_ubyte", VGPR_32, i32
 >;
 defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
+  "buffer_load_sbyte", VGPR_32, i32
 >;
 defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
+  "buffer_load_ushort", VGPR_32, i32
 >;
 defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
+  "buffer_load_sshort", VGPR_32, i32
 >;
 defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds <
-  "buffer_load_dword", VGPR_32, i32, load_global
+  "buffer_load_dword", VGPR_32, i32
 >;
 defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
-  "buffer_load_dwordx2", VReg_64, v2i32, load_global
+  "buffer_load_dwordx2", VReg_64, v2i32
 >;
 defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
-  "buffer_load_dwordx3", VReg_96, v3i32, load_global
+  "buffer_load_dwordx3", VReg_96, v3i32
 >;
 defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
-  "buffer_load_dwordx4", VReg_128, v4i32, load_global
+  "buffer_load_dwordx4", VReg_128, v4i32
 >;
 
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, az_extloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, az_extloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
+
 // This is not described in AMD documentation,
 // but 'lds' versions of these opcodes are available
 // in at least GFX8+ chips. See Bug 37653.

From becae2b2324f15bbe0de46d58e7eb2fe5bd1cef4 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Mon, 15 Jul 2019 21:46:38 +0000
Subject: [PATCH 165/451] [mips] Print BEQZL and BNEZL pseudo instructions

One of the reasons - to be compatible with GNU tools.

llvm-svn: 366133
---
 llvm/lib/Target/Mips/MipsInstrInfo.td     |  8 +++---
 llvm/test/MC/Mips/branch-pseudos.s        | 32 +++++++++++------------
 llvm/test/MC/Mips/macro-bcc-imm.s         | 16 ++++++------
 llvm/test/MC/Mips/mips-jump-delay-slots.s |  4 +--
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index ab353c92e27db..a4e85a38ab28d 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -2801,14 +2801,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def : MipsInstAlias<"bnez $rs,$offset",
                       (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
         ISA_MIPS1;
-  def : MipsInstAlias<"bnezl $rs,$offset",
-                      (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
+  def : MipsInstAlias<"bnezl $rs, $offset",
+                      (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>,
         ISA_MIPS2;
   def : MipsInstAlias<"beqz $rs,$offset",
                       (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
         ISA_MIPS1;
-  def : MipsInstAlias<"beqzl $rs,$offset",
-                      (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>,
+  def : MipsInstAlias<"beqzl $rs, $offset",
+                      (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>,
         ISA_MIPS2;
 
   def : MipsInstAlias<"syscall", (SYSCALL 0), 1>, ISA_MIPS1;
diff --git a/llvm/test/MC/Mips/branch-pseudos.s b/llvm/test/MC/Mips/branch-pseudos.s
index 56841e29f4259..9c4abdbbfad01 100644
--- a/llvm/test/MC/Mips/branch-pseudos.s
+++ b/llvm/test/MC/Mips/branch-pseudos.s
@@ -190,12 +190,12 @@ local_label:
 
   bltl $7,$8,local_label
 # CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
-# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, local_label          # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bltl $7,$8,global_label
 # CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
-# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, global_label         # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bltl $7,$0,local_label
@@ -211,12 +211,12 @@ local_label:
 
   blel $7,$8,local_label
 # CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
-# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, local_label          # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   blel $7,$8,global_label
 # CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
-# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, global_label         # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   blel $7,$0,local_label
@@ -235,12 +235,12 @@ local_label:
 
   bgel $7,$8,local_label
 # CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
-# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, local_label          # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgel $7,$8,global_label
 # CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
-# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, global_label         # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgel $7,$0,local_label
@@ -259,12 +259,12 @@ local_label:
 
   bgtl $7,$8,local_label
 # CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
-# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, local_label          # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgtl $7,$8,global_label
 # CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
-# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, global_label         # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgtl $7,$0,local_label
@@ -280,12 +280,12 @@ local_label:
 
   bltul $7,$8,local_label
 # CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
-# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, local_label          # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bltul $7,$8,global_label
 # CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
-# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, global_label         # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bltul $7,$0,local_label
@@ -301,12 +301,12 @@ local_label:
 
   bleul $7,$8,local_label
 # CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
-# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, local_label          # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bleul $7,$8,global_label
 # CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
-# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, global_label         # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bleul $7,$0,local_label
@@ -325,12 +325,12 @@ local_label:
 
   bgeul $7,$8,local_label
 # CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
-# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, local_label          # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgeul $7,$8,global_label
 # CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
-# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK: beqzl $1, global_label         # encoding: [0x50,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgeul $7,$0,local_label
@@ -349,12 +349,12 @@ local_label:
 
   bgtul $7,$8,local_label
 # CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
-# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, local_label          # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgtul $7,$8,global_label
 # CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
-# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK: bnezl $1, global_label         # encoding: [0x54,0x20,A,A]
 # CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
 # CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
   bgtul $7,$0,local_label
diff --git a/llvm/test/MC/Mips/macro-bcc-imm.s b/llvm/test/MC/Mips/macro-bcc-imm.s
index ebc4cd2ce1898..fdee6ec3670e6 100644
--- a/llvm/test/MC/Mips/macro-bcc-imm.s
+++ b/llvm/test/MC/Mips/macro-bcc-imm.s
@@ -75,33 +75,33 @@ foo:
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bltl $a2, 16, foo     # ALL: addiu $1, $zero, 16
                           # ALL: slt   $1, $6, $1
-                          # ALL: bnel  $1, $zero, foo
+                          # ALL: bnezl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     blel $a2, 16, foo     # ALL: addiu $1, $zero, 16
                           # ALL: slt   $1, $1, $6
-                          # ALL: beql  $1, $zero, foo
+                          # ALL: beqzl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bgel $a2, 32767, foo  # ALL: addiu $1, $zero, 32767
                           # ALL: slt   $1, $6, $1
-                          # ALL: beql  $1, $zero, foo
+                          # ALL: beqzl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bgtl $a2, 32768, foo  # ALL: ori   $1, $zero, 32768
                           # ALL: slt   $1, $1, $6
-                          # ALL: bnel  $1, $zero, foo
+                          # ALL: bnezl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bltul $a2, 16, foo    # ALL: addiu $1, $zero, 16
                           # ALL: sltu  $1, $6, $1
-                          # ALL: bnel  $1, $zero, foo
+                          # ALL: bnezl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bleul $a2, 16, foo    # ALL: addiu $1, $zero, 16
                           # ALL: sltu  $1, $1, $6
-                          # ALL: beql  $1, $zero, foo
+                          # ALL: beqzl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bgeul $a2, 32767, foo # ALL: addiu $1, $zero, 32767
                           # ALL: sltu  $1, $6, $1
-                          # ALL: beql  $1, $zero, foo
+                          # ALL: beqzl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
     bgtul $a2, 65536, foo # ALL: lui   $1, 1
                           # ALL: sltu  $1, $1, $6
-                          # ALL: bnel  $1, $zero, foo
+                          # ALL: bnezl $1, foo
                           # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
diff --git a/llvm/test/MC/Mips/mips-jump-delay-slots.s b/llvm/test/MC/Mips/mips-jump-delay-slots.s
index 8a0781103e9aa..f61f710b4accc 100644
--- a/llvm/test/MC/Mips/mips-jump-delay-slots.s
+++ b/llvm/test/MC/Mips/mips-jump-delay-slots.s
@@ -67,13 +67,13 @@
         # CHECK: beql $9, $6, 1332
         # CHECK: nop
         beql $9,$6,1332
-        # CHECK: beql $9, $zero, 1332
+        # CHECK: beqzl $9, 1332
         # CHECK: nop
         beqzl $9,1332
         # CHECK: bnel $9, $6, 1332
         # CHECK: nop
         bnel $9,$6,1332
-        # CHECK: bnel $9, $zero, 1332
+        # CHECK: bnezl $9, 1332
         # CHECK: nop
         bnezl $9,1332
         # CHECK: bgezl $6, 1332

From 46b84fa231c7c1b55e0271e085779d883d103657 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <adamski.dominik@gmail.com>
Date: Mon, 15 Jul 2019 22:01:55 +0000
Subject: [PATCH 166/451] [ScopInfo][NFC] Add dot at the end of comment
 statement.

llvm-svn: 366134
---
 polly/include/polly/ScopInfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 00c69cd24e104..aeed4a7e3d4f3 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -51,7 +51,7 @@ extern bool UseInstructionNames;
 
 // The maximal number of basic sets we allow during domain construction to
 // be created. More complex scops will result in very high compile time and
-// are also unlikely to result in good code
+// are also unlikely to result in good code.
 extern int const MaxDisjunctsInDomain;
 
 /// Enumeration of assumptions Polly can take.

From 423b4a18a49cdf61efd0c45a7e133218db5a8547 Mon Sep 17 00:00:00 2001
From: Austin Kerbow <Austin.Kerbow@amd.com>
Date: Mon, 15 Jul 2019 22:07:05 +0000
Subject: [PATCH 167/451] [AMDGPU] Enable merging m0 initializations.

Summary:
Enable hoisting and merging m0 defs that are initialized with the same
immediate value. Fixes bug where removed instructions are not considered
to interfere with other inits, and make sure to not hoist inits before block
prologues.

Reviewers: rampitec, arsenm

Reviewed By: rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64766

llvm-svn: 366135
---
 llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp |  47 ++++++---
 llvm/test/CodeGen/AMDGPU/merge-m0.mir      | 108 +++++++++++++++++++--
 2 files changed, 133 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 18598d6cef450..624953963cf4c 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -103,7 +103,7 @@ using namespace llvm;
 static cl::opt<bool> EnableM0Merge(
   "amdgpu-enable-merge-m0",
   cl::desc("Merge and hoist M0 initializations"),
-  cl::init(false));
+  cl::init(true));
 
 namespace {
 
@@ -452,18 +452,32 @@ static bool isReachable(const MachineInstr *From,
            (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
 }
 
+// Return the first non-prologue instruction in the block.
+static MachineBasicBlock::iterator
+getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
+  MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+  while (I != MBB->end() && TII->isBasicBlockPrologue(*I))
+    ++I;
+
+  return I;
+}
+
 // Hoist and merge identical SGPR initializations into a common predecessor.
 // This is intended to combine M0 initializations, but can work with any
 // SGPR. A VGPR cannot be processed since we cannot guarantee vector
 // executioon.
 static bool hoistAndMergeSGPRInits(unsigned Reg,
                                    const MachineRegisterInfo &MRI,
-                                   MachineDominatorTree &MDT) {
+                                   MachineDominatorTree &MDT,
+                                   const TargetInstrInfo *TII) {
   // List of inits by immediate value.
   using InitListMap = std::map<unsigned, std::list<MachineInstr *>>;
   InitListMap Inits;
   // List of clobbering instructions.
   SmallVector<MachineInstr*, 8> Clobbers;
+  // List of instructions marked for deletion.
+  SmallSet<MachineInstr*, 8> MergedInstrs;
+
   bool Changed = false;
 
   for (auto &MI : MRI.def_instructions(Reg)) {
@@ -492,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
         MachineInstr *MI2 = *I2;
 
         // Check any possible interference
-        auto intereferes = [&](MachineBasicBlock::iterator From,
-                               MachineBasicBlock::iterator To) -> bool {
+        auto interferes = [&](MachineBasicBlock::iterator From,
+                              MachineBasicBlock::iterator To) -> bool {
 
           assert(MDT.dominates(&*To, &*From));
 
@@ -525,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
         };
 
         if (MDT.dominates(MI1, MI2)) {
-          if (!intereferes(MI2, MI1)) {
+          if (!interferes(MI2, MI1)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI2->getParent()) << " " << *MI2);
-            MI2->eraseFromParent();
-            Defs.erase(I2++);
+            MergedInstrs.insert(MI2);
             Changed = true;
+            ++I2;
             continue;
           }
         } else if (MDT.dominates(MI2, MI1)) {
-          if (!intereferes(MI1, MI2)) {
+          if (!interferes(MI1, MI2)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI1->getParent()) << " " << *MI1);
-            MI1->eraseFromParent();
-            Defs.erase(I1++);
+            MergedInstrs.insert(MI1);
             Changed = true;
+            ++I1;
             break;
           }
         } else {
@@ -552,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
             continue;
           }
 
-          MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
-          if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+          MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII);
+          if (!interferes(MI1, I) && !interferes(MI2, I)) {
             LLVM_DEBUG(dbgs()
                        << "Erasing from "
                        << printMBBReference(*MI1->getParent()) << " " << *MI1
@@ -561,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
                        << printMBBReference(*MI2->getParent()) << " to "
                        << printMBBReference(*I->getParent()) << " " << *MI2);
             I->getParent()->splice(I, MI2->getParent(), MI2);
-            MI1->eraseFromParent();
-            Defs.erase(I1++);
+            MergedInstrs.insert(MI1);
             Changed = true;
+            ++I1;
             break;
           }
         }
@@ -573,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
     }
   }
 
+  for (auto MI : MergedInstrs)
+    MI->removeFromParent();
+
   if (Changed)
     MRI.clearKillFlags(Reg);
 
@@ -723,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   }
 
   if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
-    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+    hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
 
   return true;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/merge-m0.mir b/llvm/test/CodeGen/AMDGPU/merge-m0.mir
index 73a6b13685020..bafbce05a8876 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-m0.mir
@@ -47,13 +47,7 @@
 # GCN-NEXT: DS_WRITE_B32
 
 ---
-name:            test
-alignment:       0
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
+name:            merge-m0-many-init
 registers:
   - { id: 0, class: vgpr_32 }
   - { id: 1, class: vgpr_32 }
@@ -129,3 +123,103 @@ body:             |
     S_BRANCH %bb.0.entry
 
 ...
+
+# GCN:    bb.0.entry:
+# GCN:      SI_INIT_M0 65536
+# GCN-NEXT: DS_WRITE_B32
+
+#GCN:     bb.1:
+#GCN-NOT:   SI_INIT_M0 65536
+#GCN-NOT:   SI_INIT_M0 -1
+
+#GCN:     bb.2:
+#GCN:       SI_INIT_M0 -1
+
+#GCN:     bb.3:
+#GCN:       SI_INIT_M0 -1
+
+---
+name:            merge-m0-dont-hoist-past-init-with-different-initializer
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1
+
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    SI_INIT_M0 65536, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2, %bb.3
+
+    SI_INIT_M0 65536, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_CBRANCH_VCCZ %bb.2, implicit undef $vcc
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors: %bb.4
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.3:
+    successors: %bb.4
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_ENDPGM 0
+...
+
+# GCN:    bb.0.entry:
+# GCN-NOT:  SI_INIT_M0
+# GCN:      S_OR_B64
+# GCN-NEXT: SI_INIT_M0
+
+#GCN:     bb.1:
+#GCN-NOT:   SI_INIT_M0 -1
+
+#GCN:     bb.2:
+#GCN-NOT:   SI_INIT_MO -1
+
+---
+name:            merge-m0-after-prologue
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+body:             |
+  bb.0.entry:
+    successors: %bb.1, %bb.2
+    liveins: $sgpr0_sgpr1
+
+    $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.3
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    successors: %bb.3
+
+    SI_INIT_M0 -1, implicit-def $m0
+    DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_ENDPGM 0
+...

From 8528b1951cc6fda2c7bb09b259cf81f222550f82 Mon Sep 17 00:00:00 2001
From: Matt Morehouse <mascasa@google.com>
Date: Mon, 15 Jul 2019 22:07:56 +0000
Subject: [PATCH 168/451] [ASan] Fix >80 character line.

llvm-svn: 366136
---
 compiler-rt/lib/asan/asan_malloc_win.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/asan/asan_malloc_win.cc b/compiler-rt/lib/asan/asan_malloc_win.cc
index ccbce061daf60..5fad55d6e2840 100644
--- a/compiler-rt/lib/asan/asan_malloc_win.cc
+++ b/compiler-rt/lib/asan/asan_malloc_win.cc
@@ -342,7 +342,8 @@ void *SharedReAlloc(ReAllocFunction reallocFunc, SizeFunction heapSizeFunc,
       size_t old_usable_size = 0;
       if (replacement_alloc) {
         old_usable_size = asan_malloc_usable_size(lpMem, pc, bp);
-        REAL(memcpy)(replacement_alloc, lpMem, Min<size_t>(dwBytes, old_usable_size));
+        REAL(memcpy)(replacement_alloc, lpMem,
+                     Min<size_t>(dwBytes, old_usable_size));
         asan_free(lpMem, &stack, FROM_MALLOC);
       }
       return replacement_alloc;

From 5076038bb00dd5643cb4c0a881629126b359c828 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Mon, 15 Jul 2019 22:11:28 +0000
Subject: [PATCH 169/451] [DirectoryWatcher][NFC][test] Add typedef for enum

llvm-svn: 366137
---
 .../DirectoryWatcher/DirectoryWatcherTest.cpp | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
index 72bc86d4493cf..9b0e1077abce0 100644
--- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
+++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
@@ -32,6 +32,8 @@ static bool operator==(const DirectoryWatcher::Event &lhs,
 
 namespace {
 
+typedef DirectoryWatcher::Event::EventKind EventKind;
+
 struct DirectoryWatcherTestFixture {
   std::string TestRootDir;
   std::string TestWatchedDir;
@@ -81,15 +83,15 @@ struct DirectoryWatcherTestFixture {
   }
 };
 
-std::string eventKindToString(const DirectoryWatcher::Event::EventKind K) {
+std::string eventKindToString(const EventKind K) {
   switch (K) {
-  case DirectoryWatcher::Event::EventKind::Removed:
+  case EventKind::Removed:
     return "Removed";
-  case DirectoryWatcher::Event::EventKind::Modified:
+  case EventKind::Modified:
     return "Modified";
-  case DirectoryWatcher::Event::EventKind::WatchedDirRemoved:
+  case EventKind::WatchedDirRemoved:
     return "WatchedDirRemoved";
-  case DirectoryWatcher::Event::EventKind::WatcherGotInvalidated:
+  case EventKind::WatcherGotInvalidated:
     return "WatcherGotInvalidated";
   }
   llvm_unreachable("unknown event kind");
@@ -249,7 +251,6 @@ void checkEventualResultWithTimeout(VerifyingConsumer &TestConsumer) {
       !TestConsumer.result().hasValue())
     TestConsumer.printUnmetExpectations(llvm::outs());
 }
-
 } // namespace
 
 TEST(DirectoryWatcherTest, InitialScanSync) {
@@ -260,9 +261,9 @@ TEST(DirectoryWatcherTest, InitialScanSync) {
   fixture.addFile("c");
 
   VerifyingConsumer TestConsumer{
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"},
-       {DirectoryWatcher::Event::EventKind::Modified, "b"},
-       {DirectoryWatcher::Event::EventKind::Modified, "c"}},
+      {{EventKind::Modified, "a"},
+       {EventKind::Modified, "b"},
+       {EventKind::Modified, "c"}},
       {}};
 
   auto DW = DirectoryWatcher::create(
@@ -284,9 +285,9 @@ TEST(DirectoryWatcherTest, InitialScanAsync) {
   fixture.addFile("c");
 
   VerifyingConsumer TestConsumer{
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"},
-       {DirectoryWatcher::Event::EventKind::Modified, "b"},
-       {DirectoryWatcher::Event::EventKind::Modified, "c"}},
+      {{EventKind::Modified, "a"},
+       {EventKind::Modified, "b"},
+       {EventKind::Modified, "c"}},
       {}};
 
   auto DW = DirectoryWatcher::create(
@@ -305,9 +306,9 @@ TEST(DirectoryWatcherTest, AddFiles) {
 
   VerifyingConsumer TestConsumer{
       {},
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"},
-       {DirectoryWatcher::Event::EventKind::Modified, "b"},
-       {DirectoryWatcher::Event::EventKind::Modified, "c"}}};
+      {{EventKind::Modified, "a"},
+       {EventKind::Modified, "b"},
+       {EventKind::Modified, "c"}}};
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -330,8 +331,8 @@ TEST(DirectoryWatcherTest, ModifyFile) {
   fixture.addFile("a");
 
   VerifyingConsumer TestConsumer{
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"}},
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"}}};
+      {{EventKind::Modified, "a"}},
+      {{EventKind::Modified, "a"}}};
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -359,8 +360,8 @@ TEST(DirectoryWatcherTest, DeleteFile) {
   fixture.addFile("a");
 
   VerifyingConsumer TestConsumer{
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"}},
-      {{DirectoryWatcher::Event::EventKind::Removed, "a"}}};
+      {{EventKind::Modified, "a"}},
+      {{EventKind::Removed, "a"}}};
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -380,8 +381,8 @@ TEST(DirectoryWatcherTest, DeleteWatchedDir) {
 
   VerifyingConsumer TestConsumer{
       {},
-      {{DirectoryWatcher::Event::EventKind::WatchedDirRemoved, ""},
-       {DirectoryWatcher::Event::EventKind::WatcherGotInvalidated, ""}}};
+      {{EventKind::WatchedDirRemoved, ""},
+       {EventKind::WatcherGotInvalidated, ""}}};
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -400,7 +401,7 @@ TEST(DirectoryWatcherTest, InvalidatedWatcher) {
   DirectoryWatcherTestFixture fixture;
 
   VerifyingConsumer TestConsumer{
-      {}, {{DirectoryWatcher::Event::EventKind::WatcherGotInvalidated, ""}}};
+      {}, {{EventKind::WatcherGotInvalidated, ""}}};
 
   {
     auto DW = DirectoryWatcher::create(

From c5e7a3d710097efbe86e73c6422d76744288b105 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Mon, 15 Jul 2019 22:11:51 +0000
Subject: [PATCH 170/451] [DirectoryWatcher][test] Relax test assumptions

Workaround for FSEvents sometimes sending notifications for events that happened
before DirectoryWatcher was created.

This caused tests to be flaky on green dragon.

llvm-svn: 366138
---
 .../DirectoryWatcher/DirectoryWatcherTest.cpp | 64 ++++++-------------
 1 file changed, 20 insertions(+), 44 deletions(-)

diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
index 9b0e1077abce0..a6b48e5623675 100644
--- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
+++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
@@ -264,7 +264,14 @@ TEST(DirectoryWatcherTest, InitialScanSync) {
       {{EventKind::Modified, "a"},
        {EventKind::Modified, "b"},
        {EventKind::Modified, "c"}},
-      {}};
+      {},
+      // We have to ignore these as it's a race between the test process
+      // which is scanning the directory and kernel which is sending
+      // notification.
+      {{EventKind::Modified, "a"},
+       {EventKind::Modified, "b"},
+       {EventKind::Modified, "c"}}
+      };
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -288,7 +295,14 @@ TEST(DirectoryWatcherTest, InitialScanAsync) {
       {{EventKind::Modified, "a"},
        {EventKind::Modified, "b"},
        {EventKind::Modified, "c"}},
-      {}};
+      {},
+      // We have to ignore these as it's a race between the test process
+      // which is scanning the directory and kernel which is sending
+      // notification.
+      {{EventKind::Modified, "a"},
+       {EventKind::Modified, "b"},
+       {EventKind::Modified, "c"}}
+       };
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -331,6 +345,7 @@ TEST(DirectoryWatcherTest, ModifyFile) {
   fixture.addFile("a");
 
   VerifyingConsumer TestConsumer{
+      {{EventKind::Modified, "a"}},
       {{EventKind::Modified, "a"}},
       {{EventKind::Modified, "a"}}};
 
@@ -361,7 +376,8 @@ TEST(DirectoryWatcherTest, DeleteFile) {
 
   VerifyingConsumer TestConsumer{
       {{EventKind::Modified, "a"}},
-      {{EventKind::Removed, "a"}}};
+      {{EventKind::Removed, "a"}},
+      {{EventKind::Modified, "a"}}};
 
   auto DW = DirectoryWatcher::create(
       fixture.TestWatchedDir,
@@ -414,44 +430,4 @@ TEST(DirectoryWatcherTest, InvalidatedWatcher) {
   } // DW is destructed here.
 
   checkEventualResultWithTimeout(TestConsumer);
-}
-
-TEST(DirectoryWatcherTest, ChangeMetadata) {
-  DirectoryWatcherTestFixture fixture;
-  fixture.addFile("a");
-
-  VerifyingConsumer TestConsumer{
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"}},
-      // We don't expect any notification for file having access file changed.
-      {},
-      // Given the timing we are ok with receiving the duplicate event.
-      {{DirectoryWatcher::Event::EventKind::Modified, "a"}}};
-
-  auto DW = DirectoryWatcher::create(
-      fixture.TestWatchedDir,
-      [&TestConsumer](llvm::ArrayRef<DirectoryWatcher::Event> Events,
-                      bool IsInitial) {
-        TestConsumer.consume(Events, IsInitial);
-      },
-      /*waitForInitialSync=*/true);
-
-  { // Change access and modification time of file a.
-    Expected<file_t> HopefullyTheFD = llvm::sys::fs::openNativeFileForWrite(
-        fixture.getPathInWatched("a"), CD_OpenExisting, OF_None);
-    if (!HopefullyTheFD) {
-      llvm::outs() << HopefullyTheFD.takeError();
-    }
-
-    const int FD = HopefullyTheFD.get();
-    const TimePoint<> NewTimePt =
-        std::chrono::system_clock::now() - std::chrono::minutes(1);
-#ifndef NDEBUG
-    std::error_code setTimeRes =
-#endif
-        llvm::sys::fs::setLastAccessAndModificationTime(FD, NewTimePt,
-                                                        NewTimePt);
-    assert(!setTimeRes);
-  }
-
-  checkEventualResultWithTimeout(TestConsumer);
-}
+}
\ No newline at end of file

From 292e21d8bce463fb8bb833810f05d4382f61bdd0 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Mon, 15 Jul 2019 22:13:39 +0000
Subject: [PATCH 171/451] [WebAssembly] Assembler: support special floats:
 infinity / nan

Summary:
These are emitted as identifiers by the InstPrinter, so we should
parse them as such. These could potentially clash with symbols of
the same name, but that is out of our (the WebAssembly backend) control.

Reviewers: dschuff

Subscribers: sbc100, jgravelle-google, aheejin, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64770

llvm-svn: 366139
---
 .../AsmParser/WebAssemblyAsmParser.cpp        | 25 +++++++++++++++++++
 llvm/test/MC/WebAssembly/basic-assembly.s     |  4 +++
 2 files changed, 29 insertions(+)

diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index e82923519f4c1..e9a7f6977c2d3 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -363,6 +363,28 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
     return false;
   }
 
+  bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
+    if (Lexer.isNot(AsmToken::Identifier))
+      return true;
+    auto &Flt = Lexer.getTok();
+    auto S = Flt.getString();
+    double Val;
+    if (S.compare_lower("infinity") == 0) {
+      Val = std::numeric_limits<double>::infinity();
+    } else if (S.compare_lower("nan") == 0) {
+      Val = std::numeric_limits<double>::quiet_NaN();
+    } else {
+      return true;
+    }
+    if (IsNegative)
+      Val = -Val;
+    Operands.push_back(make_unique<WebAssemblyOperand>(
+        WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
+        WebAssemblyOperand::FltOp{Val}));
+    Parser.Lex();
+    return false;
+  }
+
   bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
     // FIXME: there is probably a cleaner way to do this.
     auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
@@ -476,6 +498,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
       auto &Tok = Lexer.getTok();
       switch (Tok.getKind()) {
       case AsmToken::Identifier: {
+        if (!parseSpecialFloatMaybe(false, Operands))
+          break;
         auto &Id = Lexer.getTok();
         if (ExpectBlockType) {
           // Assume this identifier is a block_type.
@@ -507,6 +531,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
         } else if(Lexer.is(AsmToken::Real)) {
           if (parseSingleFloat(true, Operands))
             return true;
+        } else if (!parseSpecialFloatMaybe(true, Operands)) {
         } else {
           return error("Expected numeric constant instead got: ",
                        Lexer.getTok());
diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s
index 81d6001175b65..c3b7e9da25de4 100644
--- a/llvm/test/MC/WebAssembly/basic-assembly.s
+++ b/llvm/test/MC/WebAssembly/basic-assembly.s
@@ -14,6 +14,8 @@ test0:
     i32.const   -1
     f64.const   0x1.999999999999ap1
     f32.const   -1.0
+    f32.const   -infinity
+    f32.const   nan
     v128.const  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
     v128.const  0, 1, 2, 3, 4, 5, 6, 7
     # Indirect addressing:
@@ -118,6 +120,8 @@ test0:
 # CHECK-NEXT:      i32.const   -1
 # CHECK-NEXT:      f64.const   0x1.999999999999ap1
 # CHECK-NEXT:      f32.const   -0x1p0
+# CHECK-NEXT:      f32.const   -infinity
+# CHECK-NEXT:      f32.const   nan
 # CHECK-NEXT:      v128.const  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 # CHECK-NEXT:      v128.const  0, 1, 2, 3, 4, 5, 6, 7
 # CHECK-NEXT:      local.get   0

From 474009eaea76ad18d66bd620a0044afbcf812dd9 Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Mon, 15 Jul 2019 22:22:10 +0000
Subject: [PATCH 172/451] [WebAssembly] Simplify regcopy.mir

Summary:
This deletes the ll templates from the functions because they don't need
them (mir files need ll templates only when they have function calls or
BB names that are not numbers).

This also renames the filename to `reg-copy.mir`, because I'm planning
to add some more `reg-*.mir` soon.

Reviewers: tlively

Subscribers: dschuff, sbc100, jgravelle-google, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64704

llvm-svn: 366140
---
 .../WebAssembly/{regcopy.mir => reg-copy.mir} | 33 +++----------------
 1 file changed, 5 insertions(+), 28 deletions(-)
 rename llvm/test/CodeGen/WebAssembly/{regcopy.mir => reg-copy.mir} (69%)

diff --git a/llvm/test/CodeGen/WebAssembly/regcopy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir
similarity index 69%
rename from llvm/test/CodeGen/WebAssembly/regcopy.mir
rename to llvm/test/CodeGen/WebAssembly/reg-copy.mir
index 5115cde6d240b..0a362699b8143 100644
--- a/llvm/test/CodeGen/WebAssembly/regcopy.mir
+++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir
@@ -1,32 +1,9 @@
-# RUN: llc %s -o - -run-pass=postrapseudos | FileCheck %s
---- |
-  target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-  target triple = "wasm32-unknown-unknown"
+# RUN: llc -mtriple=wasm32-unknown-unknown %s -o - -run-pass=postrapseudos | FileCheck %s
 
-  define void @copy_i32() {
-    ret void
-  }
-
-  define void @copy_i64() {
-    ret void
-  }
-
-  define void @copy_f32() {
-    ret void
-  }
-
-  define void @copy_f64() {
-    ret void
-  }
-
-  define void @copy_v128() {
-    ret void
-  }
-...
 ---
 name: copy_i32
 # CHECK-LABEL: copy_i32
-body:             |
+body: |
   ; CHECK-LABEL: bb.0:
   ; CHECK-NEXT: %0:i32 = COPY_I32 %1:i32
   ; CHECK-NEXT: RETURN_VOID
@@ -48,7 +25,7 @@ body:             |
 ---
 name: copy_f32
 # CHECK-LABEL: copy_f32
-body:             |
+body: |
   ; CHECK-LABEL: bb.0:
   ; CHECK-NEXT: %0:f32 = COPY_F32 %1:f32
   ; CHECK-NEXT: RETURN_VOID
@@ -59,7 +36,7 @@ body:             |
 ---
 name: copy_f64
 # CHECK-LABEL: copy_f64
-body:             |
+body: |
   ; CHECK-LABEL: bb.0:
   ; CHECK-NEXT: %0:f64 = COPY_F64 %1:f64
   ; CHECK-NEXT: RETURN_VOID
@@ -70,7 +47,7 @@ body:             |
 ---
 name: copy_v128
 # CHECK-LABEL: copy_v128
-body:             |
+body: |
   ; CHECK-LABEL: bb.0:
   ; CHECK-NEXT: %0:v128 = COPY_V128 %1:v128
   ; CHECK-NEXT: RETURN_VOID

From 853813293aceafbca6a71ded2e95e83bd3a9027d Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 15 Jul 2019 22:27:57 +0000
Subject: [PATCH 173/451] [clang-fuzzer] Remove
 'setUseOrcMCJITReplacement(false)' call.

The default value for this option (UseMCJITReplacement) is already false, and
OrcMCJITReplacement is going to have deprecation warnings attached in LLVM 9.0.
Removing this call removes a spurious warning.

llvm-svn: 366141
---
 clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp
index d0d35d51e80e5..ba2dff0af8a93 100644
--- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp
+++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp
@@ -159,7 +159,6 @@ static void CreateAndRunJITFunc(const std::string &IR, CodeGenOpt::Level OLvl) {
   builder.setMAttrs(getFeatureList());
   builder.setErrorStr(&ErrorMsg);
   builder.setEngineKind(EngineKind::JIT);
-  builder.setUseOrcMCJITReplacement(false);
   builder.setMCJITMemoryManager(make_unique<SectionMemoryManager>());
   builder.setOptLevel(OLvl);
   builder.setTargetOptions(InitTargetOptionsFromCodeGenFlags());

From 67cee1dc7ee285b03372eb818a3894d35efa7394 Mon Sep 17 00:00:00 2001
From: Shoaib Meenai <smeenai@fb.com>
Date: Mon, 15 Jul 2019 22:29:30 +0000
Subject: [PATCH 174/451] [llvm-lipo] Implement -create (with hardcoded
 alignments)

Creates universal binary output file from input files. Currently uses
hard coded value for alignment.  Want to get the create functionality
approved before implementing the alignment function.

Patch by Anusha Basana <anusha.basana@gmail.com>

Differential Revision: https://reviews.llvm.org/D64102

llvm-svn: 366142
---
 .../tools/llvm-lipo/Inputs/arm64-slice.yaml   | 101 ++++++++
 .../tools/llvm-lipo/Inputs/armv7-slice.yaml   |  76 ++++++
 .../tools/llvm-lipo/Inputs/x86_64-slice.yaml  |  89 +++++++
 .../tools/llvm-lipo/create-executable.test    |  11 +
 .../tools/llvm-lipo/create-invalid-input.test |   8 +
 .../llvm-lipo/create-without-alignment.test   |  32 +++
 .../thin-executable-universal-binary.test     |   2 +-
 llvm/tools/llvm-lipo/LipoOpts.td              |   5 +
 llvm/tools/llvm-lipo/llvm-lipo.cpp            | 222 ++++++++++++++++--
 9 files changed, 527 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml
 create mode 100644 llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml
 create mode 100644 llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml
 create mode 100644 llvm/test/tools/llvm-lipo/create-executable.test
 create mode 100644 llvm/test/tools/llvm-lipo/create-invalid-input.test
 create mode 100644 llvm/test/tools/llvm-lipo/create-without-alignment.test

diff --git a/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml
new file mode 100644
index 0000000000000..5dfd45027381e
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml
@@ -0,0 +1,101 @@
+--- !mach-o
+FileHeader:      
+  magic:           0xFEEDFACF
+  cputype:         0x0100000C
+  cpusubtype:      0x00000000
+  filetype:        0x00000001
+  ncmds:           4
+  sizeofcmds:      352
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:    
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         232
+    segname:         ''
+    vmaddr:          0
+    vmsize:          56
+    fileoff:         384
+    filesize:        56
+    maxprot:         7
+    initprot:        7
+    nsects:          2
+    flags:           0
+    Sections:        
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            20
+        offset:          0x00000180
+        align:           2
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+      - sectname:        __compact_unwind
+        segname:         __LD
+        addr:            0x0000000000000018
+        size:            32
+        offset:          0x00000198
+        align:           3
+        reloff:          0x000001B8
+        nreloc:          1
+        flags:           0x02000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+  - cmd:             LC_VERSION_MIN_IPHONEOS
+    cmdsize:         16
+    version:         327680
+    sdk:             0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          448
+    nsyms:           3
+    stroff:          496
+    strsize:         20
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       2
+    iextdefsym:      2
+    nextdefsym:      1
+    iundefsym:       3
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+LinkEditData:    
+  NameList:        
+    - n_strx:          13
+      n_type:          0x0E
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          7
+      n_type:          0x0E
+      n_sect:          2
+      n_desc:          0
+      n_value:         24
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+  StringTable:     
+    - ''
+    - _main
+    - ltmp1
+    - ltmp0
+    - ''
+...
diff --git a/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml
new file mode 100644
index 0000000000000..b26062931458c
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml
@@ -0,0 +1,76 @@
+--- !mach-o
+FileHeader:      
+  magic:           0xFEEDFACE
+  cputype:         0x0000000C
+  cpusubtype:      0x00000009
+  filetype:        0x00000001
+  ncmds:           4
+  sizeofcmds:      244
+  flags:           0x00002000
+LoadCommands:    
+  - cmd:             LC_SEGMENT
+    cmdsize:         124
+    segname:         ''
+    vmaddr:          0
+    vmsize:          10
+    fileoff:         272
+    filesize:        10
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:        
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            10
+        offset:          0x00000110
+        align:           1
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+  - cmd:             LC_VERSION_MIN_IPHONEOS
+    cmdsize:         16
+    version:         327680
+    sdk:             0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          284
+    nsyms:           1
+    stroff:          296
+    strsize:         8
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      1
+    iundefsym:       1
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+LinkEditData:    
+  NameList:        
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          8
+      n_value:         0
+  StringTable:     
+    - ''
+    - _main
+    - ''
+...
diff --git a/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml
new file mode 100644
index 0000000000000..27db6d7a13157
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml
@@ -0,0 +1,89 @@
+--- !mach-o
+FileHeader:      
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           4
+  sizeofcmds:      352
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:    
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         232
+    segname:         ''
+    vmaddr:          0
+    vmsize:          80
+    fileoff:         384
+    filesize:        80
+    maxprot:         7
+    initprot:        7
+    nsects:          2
+    flags:           0
+    Sections:        
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            15
+        offset:          0x00000180
+        align:           4
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+      - sectname:        __eh_frame
+        segname:         __TEXT
+        addr:            0x0000000000000010
+        size:            64
+        offset:          0x00000190
+        align:           3
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x6800000B
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+  - cmd:             LC_VERSION_MIN_MACOSX
+    cmdsize:         16
+    version:         656384
+    sdk:             0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          464
+    nsyms:           1
+    stroff:          480
+    strsize:         8
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      1
+    iundefsym:       1
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+LinkEditData:    
+  NameList:        
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+  StringTable:     
+    - ''
+    - _main
+    - ''
+...
diff --git a/llvm/test/tools/llvm-lipo/create-executable.test b/llvm/test/tools/llvm-lipo/create-executable.test
new file mode 100644
index 0000000000000..82aa69cfec521
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/create-executable.test
@@ -0,0 +1,11 @@
+# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o
+# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o
+
+# RUN: chmod -x %t-i386.o
+# RUN: chmod -x %t-x86_64.o
+# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o
+# RUN: ! test -x %t-universal.o
+
+# RUN: chmod +x %t-i386.o
+# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o
+# RUN: test -x %t-universal.o
diff --git a/llvm/test/tools/llvm-lipo/create-invalid-input.test b/llvm/test/tools/llvm-lipo/create-invalid-input.test
new file mode 100644
index 0000000000000..4bb2e1a566679
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/create-invalid-input.test
@@ -0,0 +1,8 @@
+# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-32.o
+# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o
+
+# RUN: not llvm-lipo %t-32.o -create 2>&1 | FileCheck --check-prefix=NO_OUTPUT %s
+# NO_OUTPUT: error: create expects a single output file to be specified
+
+# RUN: not llvm-lipo %t-universal.o %t-32.o -create -output %t.o 2>&1 | FileCheck --check-prefix=DUPLICATE_ARCHS %s
+# DUPLICATE_ARCHS: have the same architecture i386 and therefore cannot be in the same universal binary
diff --git a/llvm/test/tools/llvm-lipo/create-without-alignment.test b/llvm/test/tools/llvm-lipo/create-without-alignment.test
new file mode 100644
index 0000000000000..813230a7e3c4a
--- /dev/null
+++ b/llvm/test/tools/llvm-lipo/create-without-alignment.test
@@ -0,0 +1,32 @@
+# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o
+# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o
+
+# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal-llvm.o
+
+# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o
+# RUN: cmp %t-universal-llvm.o %t-universal.o
+
+# RUN: yaml2obj %p/Inputs/armv7-slice.yaml > %t-armv7.o
+# RUN: yaml2obj %p/Inputs/arm64-slice.yaml > %t-arm64.o
+
+# RUN: llvm-lipo %t-arm64.o %t-armv7.o %t-universal.o -create -output %t-universal-2.o
+# RUN: llvm-lipo %t-universal-2.o -thin x86_64 -output %t-x86_64_extracted.o
+# RUN: cmp %t-x86_64_extracted.o %t-x86_64.o
+# RUN: llvm-lipo %t-universal-2.o -thin armv7 -output %t-armv7-extracted.o
+# RUN: cmp %t-armv7-extracted.o %t-armv7.o
+
+# RUN: llvm-objdump %t-universal-2.o -m --universal-headers | FileCheck %s
+# CHECK: fat_magic FAT_MAGIC
+# CHECK: nfat_arch 4
+# CHECK: architecture i386
+# CHECK: offset 4096
+# CHECK: align 2^12 (4096)
+# CHECK: architecture x86_64
+# CHECK: offset 8192
+# CHECK: align 2^12 (4096)
+# CHECK: architecture armv7
+# CHECK: offset 16384
+# CHECK: align 2^14 (16384)
+# CHECK: architecture arm64
+# CHECK: offset 32768
+# CHECK: align 2^14 (16384)
diff --git a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test
index 3992373d9e46f..870252cd690ff 100644
--- a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test
+++ b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test
@@ -4,7 +4,7 @@
 
 # RUN: chmod -x %t-universal.o
 # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32.o
-# RUN: test ! -x %t32.o
+# RUN: ! test -x %t32.o
 
 # RUN: chmod +x %t-universal.o
 # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32-ex.o
diff --git a/llvm/tools/llvm-lipo/LipoOpts.td b/llvm/tools/llvm-lipo/LipoOpts.td
index e3cbe2dfa8e45..e2a73768733eb 100644
--- a/llvm/tools/llvm-lipo/LipoOpts.td
+++ b/llvm/tools/llvm-lipo/LipoOpts.td
@@ -23,6 +23,11 @@ def thin : Option<["-", "--"], "thin", KIND_SEPARATE>,
            HelpText<"Create a thin output file of specified arch_type from the "
                     "fat input file. Requires -output option">;
 
+def create : Option<["-", "--"], "create", KIND_FLAG>,
+             Group<action_group>,
+             HelpText<"Create a universal binary output file from the input "
+                      "files. Requires -output option">;
+
 def output : Option<["-", "--"], "output", KIND_SEPARATE>,
              HelpText<"Create output file with specified name">;
 def o : JoinedOrSeparate<["-"], "o">, Alias<output>;
diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp
index ea0d427e01997..65135bec951c3 100644
--- a/llvm/tools/llvm-lipo/llvm-lipo.cpp
+++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp
@@ -80,6 +80,7 @@ enum class LipoAction {
   PrintArchs,
   VerifyArch,
   ThinArch,
+  CreateUniversal,
 };
 
 struct Config {
@@ -90,6 +91,14 @@ struct Config {
   LipoAction ActionToPerform;
 };
 
+struct Slice {
+  const MachOObjectFile *ObjectFile;
+  // Requires Alignment field to store slice alignment values from universal
+  // binaries. Also needed to order the slices using compareSlices, so the total
+  // file size can be calculated before creating the output buffer.
+  uint32_t Alignment;
+};
+
 } // end namespace
 
 static void validateArchitectureName(StringRef ArchitectureName) {
@@ -108,7 +117,7 @@ static Config parseLipoOptions(ArrayRef<const char *> ArgsArr) {
   Config C;
   LipoOptTable T;
   unsigned MissingArgumentIndex, MissingArgumentCount;
-  llvm::opt::InputArgList InputArgs =
+  opt::InputArgList InputArgs =
       T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
 
   if (MissingArgumentCount)
@@ -186,6 +195,12 @@ static Config parseLipoOptions(ArrayRef<const char *> ArgsArr) {
     C.ActionToPerform = LipoAction::ThinArch;
     return C;
 
+  case LIPO_create:
+    if (C.OutputFile.empty())
+      reportError("create expects a single output file to be specified");
+    C.ActionToPerform = LipoAction::CreateUniversal;
+    return C;
+
   default:
     reportError("llvm-lipo action unspecified");
   }
@@ -195,8 +210,7 @@ static SmallVector<OwningBinary<Binary>, 1>
 readInputBinaries(ArrayRef<std::string> InputFiles) {
   SmallVector<OwningBinary<Binary>, 1> InputBinaries;
   for (StringRef InputFile : InputFiles) {
-    Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
-        createBinary(InputFile);
+    Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(InputFile);
     if (!BinaryOrErr)
       reportError(InputFile, BinaryOrErr.takeError());
     // TODO: Add compatibility for archive files
@@ -241,33 +255,35 @@ static void verifyArch(ArrayRef<OwningBinary<Binary>> InputBinaries,
   exit(EXIT_SUCCESS);
 }
 
-static void printArchOrUnknown(const MachOObjectFile *ObjectFile) {
-  // Prints trailing space and unknown in this format for compatibility with
-  // cctools lipo.
-  const std::string ObjectArch = ObjectFile->getArchTriple().getArchName();
-  if (ObjectArch.empty())
-    outs() << "unknown(" << ObjectFile->getHeader().cputype << ","
-           << ObjectFile->getHeader().cpusubtype << ") ";
-  else
-    outs() << ObjectArch + " ";
+// Returns a string of the given Object file's architecture type
+// Unknown architectures formatted unknown(CPUType,CPUSubType) for compatibility
+// with cctools lipo
+static std::string getArchString(const MachOObjectFile &ObjectFile) {
+  const Triple T = ObjectFile.getArchTriple();
+  const StringRef ObjectArch = T.getArchName();
+  if (!ObjectArch.empty())
+    return ObjectArch;
+  return ("unknown(" + Twine(ObjectFile.getHeader().cputype) + "," +
+          Twine(ObjectFile.getHeader().cpusubtype & ~MachO::CPU_SUBTYPE_MASK) +
+          ")")
+      .str();
 }
 
 LLVM_ATTRIBUTE_NORETURN
 static void printArchs(ArrayRef<OwningBinary<Binary>> InputBinaries) {
+  // Prints trailing space for compatibility with cctools lipo.
   assert(InputBinaries.size() == 1 && "Incorrect number of input binaries");
   const Binary *InputBinary = InputBinaries.front().getBinary();
   if (auto UO = dyn_cast<MachOUniversalBinary>(InputBinary)) {
-    for (MachOUniversalBinary::object_iterator I = UO->begin_objects(),
-                                               E = UO->end_objects();
-         I != E; ++I) {
+    for (const auto &O : UO->objects()) {
       Expected<std::unique_ptr<MachOObjectFile>> BinaryOrError =
-          I->getAsObjectFile();
+          O.getAsObjectFile();
       if (!BinaryOrError)
         reportError(InputBinary->getFileName(), BinaryOrError.takeError());
-      printArchOrUnknown(BinaryOrError.get().get());
+      outs() << getArchString(*BinaryOrError.get().get()) << " ";
     }
   } else if (auto O = dyn_cast<MachOObjectFile>(InputBinary)) {
-    printArchOrUnknown(O);
+    outs() << getArchString(*O) << " ";
   } else {
     llvm_unreachable("Unexpected binary format");
   }
@@ -314,6 +330,173 @@ static void extractSlice(ArrayRef<OwningBinary<Binary>> InputBinaries,
   exit(EXIT_SUCCESS);
 }
 
+static void checkArchDuplicates(const ArrayRef<Slice> &Slices) {
+  DenseMap<uint64_t, const MachOObjectFile *> CPUIds;
+  auto CPUIDForSlice = [](const Slice &S) {
+    return static_cast<uint64_t>(S.ObjectFile->getHeader().cputype) << 32 |
+           S.ObjectFile->getHeader().cpusubtype;
+  };
+  for (const auto &S : Slices) {
+    auto Entry = CPUIds.try_emplace(CPUIDForSlice(S), S.ObjectFile);
+    if (!Entry.second)
+      reportError(Entry.first->second->getFileName() + " and " +
+                  S.ObjectFile->getFileName() + " have the same architecture " +
+                  getArchString(*S.ObjectFile) +
+                  " and therefore cannot be in the same universal binary");
+  }
+}
+
+static uint32_t calculateAlignment(const MachOObjectFile *ObjectFile) {
+  // TODO: Implement getAlign() and remove hard coding
+  // Will be implemented in a follow-up.
+
+  switch (ObjectFile->getHeader().cputype) {
+  case MachO::CPU_TYPE_I386:
+  case MachO::CPU_TYPE_X86_64:
+  case MachO::CPU_TYPE_POWERPC:
+  case MachO::CPU_TYPE_POWERPC64:
+    return 12; // log2 value of page size(4k) for x86 and PPC
+  case MachO::CPU_TYPE_ARM:
+  case MachO::CPU_TYPE_ARM64:
+  case MachO::CPU_TYPE_ARM64_32:
+    return 14; // log2 value of page size(16k) for Darwin ARM
+  default:
+    return 12;
+  }
+}
+
+// This function replicates ordering from cctools lipo for consistency
+static bool compareSlices(const Slice &Lhs, const Slice &Rhs) {
+  if (Lhs.ObjectFile->getHeader().cputype ==
+      Rhs.ObjectFile->getHeader().cputype)
+    return Lhs.ObjectFile->getHeader().cpusubtype <
+           Rhs.ObjectFile->getHeader().cpusubtype;
+
+  // force arm64-family to follow after all other slices for compatibility
+  // with cctools lipo
+  if (Lhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64)
+    return false;
+  if (Rhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64)
+    return true;
+
+  // Sort by alignment to minimize file size
+  return Lhs.Alignment < Rhs.Alignment;
+}
+
+// Updates vector ExtractedObjects with the MachOObjectFiles extracted from
+// Universal Binary files to transfer ownership.
+static SmallVector<Slice, 2> buildSlices(
+    ArrayRef<OwningBinary<Binary>> InputBinaries,
+    SmallVectorImpl<std::unique_ptr<MachOObjectFile>> &ExtractedObjects) {
+  SmallVector<Slice, 2> Slices;
+  for (auto &IB : InputBinaries) {
+    const Binary *InputBinary = IB.getBinary();
+    if (auto UO = dyn_cast<MachOUniversalBinary>(InputBinary)) {
+      for (const auto &O : UO->objects()) {
+        Expected<std::unique_ptr<MachOObjectFile>> BinaryOrError =
+            O.getAsObjectFile();
+        if (!BinaryOrError)
+          reportError(InputBinary->getFileName(), BinaryOrError.takeError());
+        ExtractedObjects.push_back(std::move(BinaryOrError.get()));
+        Slices.push_back(Slice{ExtractedObjects.back().get(), O.getAlign()});
+      }
+    } else if (auto O = dyn_cast<MachOObjectFile>(InputBinary)) {
+      Slices.push_back(Slice{O, calculateAlignment(O)});
+    } else {
+      llvm_unreachable("Unexpected binary format");
+    }
+  }
+  return Slices;
+}
+
+static SmallVector<MachO::fat_arch, 2>
+buildFatArchList(ArrayRef<Slice> Slices) {
+  SmallVector<MachO::fat_arch, 2> FatArchList;
+  uint64_t Offset =
+      sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch);
+
+  for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) {
+    Offset = alignTo(Offset, 1 << Slices[Index].Alignment);
+    const MachOObjectFile *ObjectFile = Slices[Index].ObjectFile;
+    if (Offset > UINT32_MAX)
+      reportError("fat file too large to be created because the offset "
+                  "field in struct fat_arch is only 32-bits and the offset " +
+                  Twine(Offset) + " for " + ObjectFile->getFileName() +
+                  " for architecture " + getArchString(*ObjectFile) +
+                  "exceeds that.");
+
+    MachO::fat_arch FatArch;
+    FatArch.cputype = ObjectFile->getHeader().cputype;
+    FatArch.cpusubtype = ObjectFile->getHeader().cpusubtype;
+    FatArch.offset = Offset;
+    FatArch.size = ObjectFile->getMemoryBufferRef().getBufferSize();
+    FatArch.align = Slices[Index].Alignment;
+    Offset += FatArch.size;
+    FatArchList.push_back(FatArch);
+  }
+  return FatArchList;
+}
+
+static void createUniversalBinary(SmallVectorImpl<Slice> &Slices,
+                                  StringRef OutputFileName) {
+  MachO::fat_header FatHeader;
+  FatHeader.magic = MachO::FAT_MAGIC;
+  FatHeader.nfat_arch = Slices.size();
+
+  stable_sort(Slices, compareSlices);
+  SmallVector<MachO::fat_arch, 2> FatArchList = buildFatArchList(Slices);
+
+  const bool IsExecutable = any_of(Slices, [](Slice S) {
+    return sys::fs::can_execute(S.ObjectFile->getFileName());
+  });
+  const uint64_t OutputFileSize =
+      FatArchList.back().offset + FatArchList.back().size;
+  Expected<std::unique_ptr<FileOutputBuffer>> OutFileOrError =
+      FileOutputBuffer::create(OutputFileName, OutputFileSize,
+                               IsExecutable ? FileOutputBuffer::F_executable
+                                            : 0);
+  if (!OutFileOrError)
+    reportError(OutputFileName, OutFileOrError.takeError());
+  std::unique_ptr<FileOutputBuffer> OutFile = std::move(OutFileOrError.get());
+  std::memset(OutFile->getBufferStart(), 0, OutputFileSize);
+
+  if (sys::IsLittleEndianHost)
+    MachO::swapStruct(FatHeader);
+  std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header));
+
+  for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) {
+    MemoryBufferRef BufferRef = Slices[Index].ObjectFile->getMemoryBufferRef();
+    std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(),
+              OutFile->getBufferStart() + FatArchList[Index].offset);
+  }
+
+  // FatArchs written after Slices in order reduce the number of swaps for the
+  // LittleEndian case
+  if (sys::IsLittleEndianHost)
+    for (MachO::fat_arch &FA : FatArchList)
+      MachO::swapStruct(FA);
+  std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header),
+              FatArchList.begin(),
+              sizeof(MachO::fat_arch) * FatArchList.size());
+
+  if (Error E = OutFile->commit())
+    reportError(OutputFileName, std::move(E));
+}
+
+LLVM_ATTRIBUTE_NORETURN
+static void createUniversalBinary(ArrayRef<OwningBinary<Binary>> InputBinaries,
+                                  StringRef OutputFileName) {
+  assert(InputBinaries.size() >= 1 && "Incorrect number of input binaries");
+  assert(!OutputFileName.empty() && "Create expects a single output file");
+
+  SmallVector<std::unique_ptr<MachOObjectFile>, 1> ExtractedObjects;
+  SmallVector<Slice, 1> Slices = buildSlices(InputBinaries, ExtractedObjects);
+  checkArchDuplicates(Slices);
+  createUniversalBinary(Slices, OutputFileName);
+
+  exit(EXIT_SUCCESS);
+}
+
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   Config C = parseLipoOptions(makeArrayRef(argv + 1, argc));
@@ -330,6 +513,9 @@ int main(int argc, char **argv) {
   case LipoAction::ThinArch:
     extractSlice(InputBinaries, C.ThinArchType, C.OutputFile);
     break;
+  case LipoAction::CreateUniversal:
+    createUniversalBinary(InputBinaries, C.OutputFile);
+    break;
   }
   return EXIT_SUCCESS;
 }

From 4422cc4f1a910f790349d678a1c83bae5cd07ccb Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Mon, 15 Jul 2019 22:34:19 +0000
Subject: [PATCH 175/451] [OpenCL] Make TableGen'd builtin tables and helper
 functions static

Reviewers: Pierre, Anastasia

Reviewed By: Anastasia

Subscribers: yaxunl, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64608

llvm-svn: 366143
---
 clang/lib/Sema/SemaLookup.cpp                      | 2 +-
 clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index c0b946a98d1dc..8a24dd884a76a 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -688,7 +688,7 @@ static void InsertOCLBuiltinDeclarations(Sema &S, LookupResult &LR,
                                          unsigned Len) {
 
   for (unsigned i = 0; i < Len; ++i) {
-    OpenCLBuiltinDecl &Decl = OpenCLBuiltins[Index - 1 + i];
+    const OpenCLBuiltinDecl &Decl = OpenCLBuiltins[Index - 1 + i];
     ASTContext &Context = S.Context;
 
     // Ignore this BIF if the version is incorrect.
diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
index 1e495039c494c..8d83b1c7fa6b9 100644
--- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
+++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp
@@ -207,7 +207,7 @@ void BuiltinNameEmitter::GetOverloads() {
 }
 
 void BuiltinNameEmitter::EmitSignatureTable() {
-  OS << "OpenCLType OpenCLSignature[] = {\n";
+  OS << "static const OpenCLType OpenCLSignature[] = {\n";
   for (auto &P : SignatureSet) {
     OS << "// " << P.second << "\n";
     for (Record *R : P.first) {
@@ -222,7 +222,7 @@ void BuiltinNameEmitter::EmitSignatureTable() {
 }
 
 void BuiltinNameEmitter::EmitBuiltinTable() {
-  OS << "OpenCLBuiltinDecl OpenCLBuiltins[] = {\n";
+  OS << "static const OpenCLBuiltinDecl OpenCLBuiltins[] = {\n";
   for (auto &i : OverloadInfo) {
     StringRef Name = i.first;
     OS << "// " << Name << "\n";
@@ -255,7 +255,7 @@ void BuiltinNameEmitter::EmitStringMatcher() {
   OS << R"(
 // Return 0 if name is not a recognized OpenCL builtin, or an index
 // into a table of declarations if it is an OpenCL builtin.
-std::pair<unsigned, unsigned> isOpenCLBuiltin(llvm::StringRef name) {
+static std::pair<unsigned, unsigned> isOpenCLBuiltin(llvm::StringRef name) {
 
 )";
 

From c9e3c8301446f20efef6721dd3a05f2f9da217d8 Mon Sep 17 00:00:00 2001
From: Shoaib Meenai <smeenai@fb.com>
Date: Mon, 15 Jul 2019 22:44:08 +0000
Subject: [PATCH 176/451] Revert [llvm-lipo] Implement -create (with hardcoded
 alignments)

This reverts r366142 (git commit 67cee1dc7ee285b03372eb818a3894d35efa7394)

The test is failing on the Windows buildbots. Reverting while I
investigate.

llvm-svn: 366144
---
 .../tools/llvm-lipo/Inputs/arm64-slice.yaml   | 101 --------
 .../tools/llvm-lipo/Inputs/armv7-slice.yaml   |  76 ------
 .../tools/llvm-lipo/Inputs/x86_64-slice.yaml  |  89 -------
 .../tools/llvm-lipo/create-executable.test    |  11 -
 .../tools/llvm-lipo/create-invalid-input.test |   8 -
 .../llvm-lipo/create-without-alignment.test   |  32 ---
 .../thin-executable-universal-binary.test     |   2 +-
 llvm/tools/llvm-lipo/LipoOpts.td              |   5 -
 llvm/tools/llvm-lipo/llvm-lipo.cpp            | 222 ++----------------
 9 files changed, 19 insertions(+), 527 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml
 delete mode 100644 llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml
 delete mode 100644 llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml
 delete mode 100644 llvm/test/tools/llvm-lipo/create-executable.test
 delete mode 100644 llvm/test/tools/llvm-lipo/create-invalid-input.test
 delete mode 100644 llvm/test/tools/llvm-lipo/create-without-alignment.test

diff --git a/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml
deleted file mode 100644
index 5dfd45027381e..0000000000000
--- a/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
---- !mach-o
-FileHeader:      
-  magic:           0xFEEDFACF
-  cputype:         0x0100000C
-  cpusubtype:      0x00000000
-  filetype:        0x00000001
-  ncmds:           4
-  sizeofcmds:      352
-  flags:           0x00002000
-  reserved:        0x00000000
-LoadCommands:    
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         232
-    segname:         ''
-    vmaddr:          0
-    vmsize:          56
-    fileoff:         384
-    filesize:        56
-    maxprot:         7
-    initprot:        7
-    nsects:          2
-    flags:           0
-    Sections:        
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0000000000000000
-        size:            20
-        offset:          0x00000180
-        align:           2
-        reloff:          0x00000000
-        nreloc:          0
-        flags:           0x80000400
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
-      - sectname:        __compact_unwind
-        segname:         __LD
-        addr:            0x0000000000000018
-        size:            32
-        offset:          0x00000198
-        align:           3
-        reloff:          0x000001B8
-        nreloc:          1
-        flags:           0x02000000
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
-  - cmd:             LC_VERSION_MIN_IPHONEOS
-    cmdsize:         16
-    version:         327680
-    sdk:             0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          448
-    nsyms:           3
-    stroff:          496
-    strsize:         20
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       2
-    iextdefsym:      2
-    nextdefsym:      1
-    iundefsym:       3
-    nundefsym:       0
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-LinkEditData:    
-  NameList:        
-    - n_strx:          13
-      n_type:          0x0E
-      n_sect:          1
-      n_desc:          0
-      n_value:         0
-    - n_strx:          7
-      n_type:          0x0E
-      n_sect:          2
-      n_desc:          0
-      n_value:         24
-    - n_strx:          1
-      n_type:          0x0F
-      n_sect:          1
-      n_desc:          0
-      n_value:         0
-  StringTable:     
-    - ''
-    - _main
-    - ltmp1
-    - ltmp0
-    - ''
-...
diff --git a/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml
deleted file mode 100644
index b26062931458c..0000000000000
--- a/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
---- !mach-o
-FileHeader:      
-  magic:           0xFEEDFACE
-  cputype:         0x0000000C
-  cpusubtype:      0x00000009
-  filetype:        0x00000001
-  ncmds:           4
-  sizeofcmds:      244
-  flags:           0x00002000
-LoadCommands:    
-  - cmd:             LC_SEGMENT
-    cmdsize:         124
-    segname:         ''
-    vmaddr:          0
-    vmsize:          10
-    fileoff:         272
-    filesize:        10
-    maxprot:         7
-    initprot:        7
-    nsects:          1
-    flags:           0
-    Sections:        
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0000000000000000
-        size:            10
-        offset:          0x00000110
-        align:           1
-        reloff:          0x00000000
-        nreloc:          0
-        flags:           0x80000400
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
-  - cmd:             LC_VERSION_MIN_IPHONEOS
-    cmdsize:         16
-    version:         327680
-    sdk:             0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          284
-    nsyms:           1
-    stroff:          296
-    strsize:         8
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       0
-    iextdefsym:      0
-    nextdefsym:      1
-    iundefsym:       1
-    nundefsym:       0
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-LinkEditData:    
-  NameList:        
-    - n_strx:          1
-      n_type:          0x0F
-      n_sect:          1
-      n_desc:          8
-      n_value:         0
-  StringTable:     
-    - ''
-    - _main
-    - ''
-...
diff --git a/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml
deleted file mode 100644
index 27db6d7a13157..0000000000000
--- a/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
---- !mach-o
-FileHeader:      
-  magic:           0xFEEDFACF
-  cputype:         0x01000007
-  cpusubtype:      0x00000003
-  filetype:        0x00000001
-  ncmds:           4
-  sizeofcmds:      352
-  flags:           0x00002000
-  reserved:        0x00000000
-LoadCommands:    
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         232
-    segname:         ''
-    vmaddr:          0
-    vmsize:          80
-    fileoff:         384
-    filesize:        80
-    maxprot:         7
-    initprot:        7
-    nsects:          2
-    flags:           0
-    Sections:        
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0000000000000000
-        size:            15
-        offset:          0x00000180
-        align:           4
-        reloff:          0x00000000
-        nreloc:          0
-        flags:           0x80000400
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
-      - sectname:        __eh_frame
-        segname:         __TEXT
-        addr:            0x0000000000000010
-        size:            64
-        offset:          0x00000190
-        align:           3
-        reloff:          0x00000000
-        nreloc:          0
-        flags:           0x6800000B
-        reserved1:       0x00000000
-        reserved2:       0x00000000
-        reserved3:       0x00000000
-  - cmd:             LC_VERSION_MIN_MACOSX
-    cmdsize:         16
-    version:         656384
-    sdk:             0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          464
-    nsyms:           1
-    stroff:          480
-    strsize:         8
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       0
-    iextdefsym:      0
-    nextdefsym:      1
-    iundefsym:       1
-    nundefsym:       0
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-LinkEditData:    
-  NameList:        
-    - n_strx:          1
-      n_type:          0x0F
-      n_sect:          1
-      n_desc:          0
-      n_value:         0
-  StringTable:     
-    - ''
-    - _main
-    - ''
-...
diff --git a/llvm/test/tools/llvm-lipo/create-executable.test b/llvm/test/tools/llvm-lipo/create-executable.test
deleted file mode 100644
index 82aa69cfec521..0000000000000
--- a/llvm/test/tools/llvm-lipo/create-executable.test
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o
-# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o
-
-# RUN: chmod -x %t-i386.o
-# RUN: chmod -x %t-x86_64.o
-# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o
-# RUN: ! test -x %t-universal.o
-
-# RUN: chmod +x %t-i386.o
-# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o
-# RUN: test -x %t-universal.o
diff --git a/llvm/test/tools/llvm-lipo/create-invalid-input.test b/llvm/test/tools/llvm-lipo/create-invalid-input.test
deleted file mode 100644
index 4bb2e1a566679..0000000000000
--- a/llvm/test/tools/llvm-lipo/create-invalid-input.test
+++ /dev/null
@@ -1,8 +0,0 @@
-# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-32.o
-# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o
-
-# RUN: not llvm-lipo %t-32.o -create 2>&1 | FileCheck --check-prefix=NO_OUTPUT %s
-# NO_OUTPUT: error: create expects a single output file to be specified
-
-# RUN: not llvm-lipo %t-universal.o %t-32.o -create -output %t.o 2>&1 | FileCheck --check-prefix=DUPLICATE_ARCHS %s
-# DUPLICATE_ARCHS: have the same architecture i386 and therefore cannot be in the same universal binary
diff --git a/llvm/test/tools/llvm-lipo/create-without-alignment.test b/llvm/test/tools/llvm-lipo/create-without-alignment.test
deleted file mode 100644
index 813230a7e3c4a..0000000000000
--- a/llvm/test/tools/llvm-lipo/create-without-alignment.test
+++ /dev/null
@@ -1,32 +0,0 @@
-# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o
-# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o
-
-# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal-llvm.o
-
-# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o
-# RUN: cmp %t-universal-llvm.o %t-universal.o
-
-# RUN: yaml2obj %p/Inputs/armv7-slice.yaml > %t-armv7.o
-# RUN: yaml2obj %p/Inputs/arm64-slice.yaml > %t-arm64.o
-
-# RUN: llvm-lipo %t-arm64.o %t-armv7.o %t-universal.o -create -output %t-universal-2.o
-# RUN: llvm-lipo %t-universal-2.o -thin x86_64 -output %t-x86_64_extracted.o
-# RUN: cmp %t-x86_64_extracted.o %t-x86_64.o
-# RUN: llvm-lipo %t-universal-2.o -thin armv7 -output %t-armv7-extracted.o
-# RUN: cmp %t-armv7-extracted.o %t-armv7.o
-
-# RUN: llvm-objdump %t-universal-2.o -m --universal-headers | FileCheck %s
-# CHECK: fat_magic FAT_MAGIC
-# CHECK: nfat_arch 4
-# CHECK: architecture i386
-# CHECK: offset 4096
-# CHECK: align 2^12 (4096)
-# CHECK: architecture x86_64
-# CHECK: offset 8192
-# CHECK: align 2^12 (4096)
-# CHECK: architecture armv7
-# CHECK: offset 16384
-# CHECK: align 2^14 (16384)
-# CHECK: architecture arm64
-# CHECK: offset 32768
-# CHECK: align 2^14 (16384)
diff --git a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test
index 870252cd690ff..3992373d9e46f 100644
--- a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test
+++ b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test
@@ -4,7 +4,7 @@
 
 # RUN: chmod -x %t-universal.o
 # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32.o
-# RUN: ! test -x %t32.o
+# RUN: test ! -x %t32.o
 
 # RUN: chmod +x %t-universal.o
 # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32-ex.o
diff --git a/llvm/tools/llvm-lipo/LipoOpts.td b/llvm/tools/llvm-lipo/LipoOpts.td
index e2a73768733eb..e3cbe2dfa8e45 100644
--- a/llvm/tools/llvm-lipo/LipoOpts.td
+++ b/llvm/tools/llvm-lipo/LipoOpts.td
@@ -23,11 +23,6 @@ def thin : Option<["-", "--"], "thin", KIND_SEPARATE>,
            HelpText<"Create a thin output file of specified arch_type from the "
                     "fat input file. Requires -output option">;
 
-def create : Option<["-", "--"], "create", KIND_FLAG>,
-             Group<action_group>,
-             HelpText<"Create a universal binary output file from the input "
-                      "files. Requires -output option">;
-
 def output : Option<["-", "--"], "output", KIND_SEPARATE>,
              HelpText<"Create output file with specified name">;
 def o : JoinedOrSeparate<["-"], "o">, Alias<output>;
diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp
index 65135bec951c3..ea0d427e01997 100644
--- a/llvm/tools/llvm-lipo/llvm-lipo.cpp
+++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp
@@ -80,7 +80,6 @@ enum class LipoAction {
   PrintArchs,
   VerifyArch,
   ThinArch,
-  CreateUniversal,
 };
 
 struct Config {
@@ -91,14 +90,6 @@ struct Config {
   LipoAction ActionToPerform;
 };
 
-struct Slice {
-  const MachOObjectFile *ObjectFile;
-  // Requires Alignment field to store slice alignment values from universal
-  // binaries. Also needed to order the slices using compareSlices, so the total
-  // file size can be calculated before creating the output buffer.
-  uint32_t Alignment;
-};
-
 } // end namespace
 
 static void validateArchitectureName(StringRef ArchitectureName) {
@@ -117,7 +108,7 @@ static Config parseLipoOptions(ArrayRef<const char *> ArgsArr) {
   Config C;
   LipoOptTable T;
   unsigned MissingArgumentIndex, MissingArgumentCount;
-  opt::InputArgList InputArgs =
+  llvm::opt::InputArgList InputArgs =
       T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount);
 
   if (MissingArgumentCount)
@@ -195,12 +186,6 @@ static Config parseLipoOptions(ArrayRef<const char *> ArgsArr) {
     C.ActionToPerform = LipoAction::ThinArch;
     return C;
 
-  case LIPO_create:
-    if (C.OutputFile.empty())
-      reportError("create expects a single output file to be specified");
-    C.ActionToPerform = LipoAction::CreateUniversal;
-    return C;
-
   default:
     reportError("llvm-lipo action unspecified");
   }
@@ -210,7 +195,8 @@ static SmallVector<OwningBinary<Binary>, 1>
 readInputBinaries(ArrayRef<std::string> InputFiles) {
   SmallVector<OwningBinary<Binary>, 1> InputBinaries;
   for (StringRef InputFile : InputFiles) {
-    Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(InputFile);
+    Expected<OwningBinary<llvm::object::Binary>> BinaryOrErr =
+        createBinary(InputFile);
     if (!BinaryOrErr)
       reportError(InputFile, BinaryOrErr.takeError());
     // TODO: Add compatibility for archive files
@@ -255,35 +241,33 @@ static void verifyArch(ArrayRef<OwningBinary<Binary>> InputBinaries,
   exit(EXIT_SUCCESS);
 }
 
-// Returns a string of the given Object file's architecture type
-// Unknown architectures formatted unknown(CPUType,CPUSubType) for compatibility
-// with cctools lipo
-static std::string getArchString(const MachOObjectFile &ObjectFile) {
-  const Triple T = ObjectFile.getArchTriple();
-  const StringRef ObjectArch = T.getArchName();
-  if (!ObjectArch.empty())
-    return ObjectArch;
-  return ("unknown(" + Twine(ObjectFile.getHeader().cputype) + "," +
-          Twine(ObjectFile.getHeader().cpusubtype & ~MachO::CPU_SUBTYPE_MASK) +
-          ")")
-      .str();
+static void printArchOrUnknown(const MachOObjectFile *ObjectFile) {
+  // Prints trailing space and unknown in this format for compatibility with
+  // cctools lipo.
+  const std::string ObjectArch = ObjectFile->getArchTriple().getArchName();
+  if (ObjectArch.empty())
+    outs() << "unknown(" << ObjectFile->getHeader().cputype << ","
+           << ObjectFile->getHeader().cpusubtype << ") ";
+  else
+    outs() << ObjectArch + " ";
 }
 
 LLVM_ATTRIBUTE_NORETURN
 static void printArchs(ArrayRef<OwningBinary<Binary>> InputBinaries) {
-  // Prints trailing space for compatibility with cctools lipo.
   assert(InputBinaries.size() == 1 && "Incorrect number of input binaries");
   const Binary *InputBinary = InputBinaries.front().getBinary();
   if (auto UO = dyn_cast<MachOUniversalBinary>(InputBinary)) {
-    for (const auto &O : UO->objects()) {
+    for (MachOUniversalBinary::object_iterator I = UO->begin_objects(),
+                                               E = UO->end_objects();
+         I != E; ++I) {
       Expected<std::unique_ptr<MachOObjectFile>> BinaryOrError =
-          O.getAsObjectFile();
+          I->getAsObjectFile();
       if (!BinaryOrError)
         reportError(InputBinary->getFileName(), BinaryOrError.takeError());
-      outs() << getArchString(*BinaryOrError.get().get()) << " ";
+      printArchOrUnknown(BinaryOrError.get().get());
     }
   } else if (auto O = dyn_cast<MachOObjectFile>(InputBinary)) {
-    outs() << getArchString(*O) << " ";
+    printArchOrUnknown(O);
   } else {
     llvm_unreachable("Unexpected binary format");
   }
@@ -330,173 +314,6 @@ static void extractSlice(ArrayRef<OwningBinary<Binary>> InputBinaries,
   exit(EXIT_SUCCESS);
 }
 
-static void checkArchDuplicates(const ArrayRef<Slice> &Slices) {
-  DenseMap<uint64_t, const MachOObjectFile *> CPUIds;
-  auto CPUIDForSlice = [](const Slice &S) {
-    return static_cast<uint64_t>(S.ObjectFile->getHeader().cputype) << 32 |
-           S.ObjectFile->getHeader().cpusubtype;
-  };
-  for (const auto &S : Slices) {
-    auto Entry = CPUIds.try_emplace(CPUIDForSlice(S), S.ObjectFile);
-    if (!Entry.second)
-      reportError(Entry.first->second->getFileName() + " and " +
-                  S.ObjectFile->getFileName() + " have the same architecture " +
-                  getArchString(*S.ObjectFile) +
-                  " and therefore cannot be in the same universal binary");
-  }
-}
-
-static uint32_t calculateAlignment(const MachOObjectFile *ObjectFile) {
-  // TODO: Implement getAlign() and remove hard coding
-  // Will be implemented in a follow-up.
-
-  switch (ObjectFile->getHeader().cputype) {
-  case MachO::CPU_TYPE_I386:
-  case MachO::CPU_TYPE_X86_64:
-  case MachO::CPU_TYPE_POWERPC:
-  case MachO::CPU_TYPE_POWERPC64:
-    return 12; // log2 value of page size(4k) for x86 and PPC
-  case MachO::CPU_TYPE_ARM:
-  case MachO::CPU_TYPE_ARM64:
-  case MachO::CPU_TYPE_ARM64_32:
-    return 14; // log2 value of page size(16k) for Darwin ARM
-  default:
-    return 12;
-  }
-}
-
-// This function replicates ordering from cctools lipo for consistency
-static bool compareSlices(const Slice &Lhs, const Slice &Rhs) {
-  if (Lhs.ObjectFile->getHeader().cputype ==
-      Rhs.ObjectFile->getHeader().cputype)
-    return Lhs.ObjectFile->getHeader().cpusubtype <
-           Rhs.ObjectFile->getHeader().cpusubtype;
-
-  // force arm64-family to follow after all other slices for compatibility
-  // with cctools lipo
-  if (Lhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64)
-    return false;
-  if (Rhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64)
-    return true;
-
-  // Sort by alignment to minimize file size
-  return Lhs.Alignment < Rhs.Alignment;
-}
-
-// Updates vector ExtractedObjects with the MachOObjectFiles extracted from
-// Universal Binary files to transfer ownership.
-static SmallVector<Slice, 2> buildSlices(
-    ArrayRef<OwningBinary<Binary>> InputBinaries,
-    SmallVectorImpl<std::unique_ptr<MachOObjectFile>> &ExtractedObjects) {
-  SmallVector<Slice, 2> Slices;
-  for (auto &IB : InputBinaries) {
-    const Binary *InputBinary = IB.getBinary();
-    if (auto UO = dyn_cast<MachOUniversalBinary>(InputBinary)) {
-      for (const auto &O : UO->objects()) {
-        Expected<std::unique_ptr<MachOObjectFile>> BinaryOrError =
-            O.getAsObjectFile();
-        if (!BinaryOrError)
-          reportError(InputBinary->getFileName(), BinaryOrError.takeError());
-        ExtractedObjects.push_back(std::move(BinaryOrError.get()));
-        Slices.push_back(Slice{ExtractedObjects.back().get(), O.getAlign()});
-      }
-    } else if (auto O = dyn_cast<MachOObjectFile>(InputBinary)) {
-      Slices.push_back(Slice{O, calculateAlignment(O)});
-    } else {
-      llvm_unreachable("Unexpected binary format");
-    }
-  }
-  return Slices;
-}
-
-static SmallVector<MachO::fat_arch, 2>
-buildFatArchList(ArrayRef<Slice> Slices) {
-  SmallVector<MachO::fat_arch, 2> FatArchList;
-  uint64_t Offset =
-      sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch);
-
-  for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) {
-    Offset = alignTo(Offset, 1 << Slices[Index].Alignment);
-    const MachOObjectFile *ObjectFile = Slices[Index].ObjectFile;
-    if (Offset > UINT32_MAX)
-      reportError("fat file too large to be created because the offset "
-                  "field in struct fat_arch is only 32-bits and the offset " +
-                  Twine(Offset) + " for " + ObjectFile->getFileName() +
-                  " for architecture " + getArchString(*ObjectFile) +
-                  "exceeds that.");
-
-    MachO::fat_arch FatArch;
-    FatArch.cputype = ObjectFile->getHeader().cputype;
-    FatArch.cpusubtype = ObjectFile->getHeader().cpusubtype;
-    FatArch.offset = Offset;
-    FatArch.size = ObjectFile->getMemoryBufferRef().getBufferSize();
-    FatArch.align = Slices[Index].Alignment;
-    Offset += FatArch.size;
-    FatArchList.push_back(FatArch);
-  }
-  return FatArchList;
-}
-
-static void createUniversalBinary(SmallVectorImpl<Slice> &Slices,
-                                  StringRef OutputFileName) {
-  MachO::fat_header FatHeader;
-  FatHeader.magic = MachO::FAT_MAGIC;
-  FatHeader.nfat_arch = Slices.size();
-
-  stable_sort(Slices, compareSlices);
-  SmallVector<MachO::fat_arch, 2> FatArchList = buildFatArchList(Slices);
-
-  const bool IsExecutable = any_of(Slices, [](Slice S) {
-    return sys::fs::can_execute(S.ObjectFile->getFileName());
-  });
-  const uint64_t OutputFileSize =
-      FatArchList.back().offset + FatArchList.back().size;
-  Expected<std::unique_ptr<FileOutputBuffer>> OutFileOrError =
-      FileOutputBuffer::create(OutputFileName, OutputFileSize,
-                               IsExecutable ? FileOutputBuffer::F_executable
-                                            : 0);
-  if (!OutFileOrError)
-    reportError(OutputFileName, OutFileOrError.takeError());
-  std::unique_ptr<FileOutputBuffer> OutFile = std::move(OutFileOrError.get());
-  std::memset(OutFile->getBufferStart(), 0, OutputFileSize);
-
-  if (sys::IsLittleEndianHost)
-    MachO::swapStruct(FatHeader);
-  std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header));
-
-  for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) {
-    MemoryBufferRef BufferRef = Slices[Index].ObjectFile->getMemoryBufferRef();
-    std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(),
-              OutFile->getBufferStart() + FatArchList[Index].offset);
-  }
-
-  // FatArchs written after Slices in order reduce the number of swaps for the
-  // LittleEndian case
-  if (sys::IsLittleEndianHost)
-    for (MachO::fat_arch &FA : FatArchList)
-      MachO::swapStruct(FA);
-  std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header),
-              FatArchList.begin(),
-              sizeof(MachO::fat_arch) * FatArchList.size());
-
-  if (Error E = OutFile->commit())
-    reportError(OutputFileName, std::move(E));
-}
-
-LLVM_ATTRIBUTE_NORETURN
-static void createUniversalBinary(ArrayRef<OwningBinary<Binary>> InputBinaries,
-                                  StringRef OutputFileName) {
-  assert(InputBinaries.size() >= 1 && "Incorrect number of input binaries");
-  assert(!OutputFileName.empty() && "Create expects a single output file");
-
-  SmallVector<std::unique_ptr<MachOObjectFile>, 1> ExtractedObjects;
-  SmallVector<Slice, 1> Slices = buildSlices(InputBinaries, ExtractedObjects);
-  checkArchDuplicates(Slices);
-  createUniversalBinary(Slices, OutputFileName);
-
-  exit(EXIT_SUCCESS);
-}
-
 int main(int argc, char **argv) {
   InitLLVM X(argc, argv);
   Config C = parseLipoOptions(makeArrayRef(argv + 1, argc));
@@ -513,9 +330,6 @@ int main(int argc, char **argv) {
   case LipoAction::ThinArch:
     extractSlice(InputBinaries, C.ThinArchType, C.OutputFile);
     break;
-  case LipoAction::CreateUniversal:
-    createUniversalBinary(InputBinaries, C.OutputFile);
-    break;
   }
   return EXIT_SUCCESS;
 }

From 9f96a58cccb63110ca9515644c454620c86c566d Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Mon, 15 Jul 2019 22:49:25 +0000
Subject: [PATCH 177/451] [WebAssembly] Rename except_ref type to exnref

Summary:
We agreed to rename `except_ref` to `exnref` for consistency with other
reference types in
https://github.com/WebAssembly/exception-handling/issues/79. This also
renames WebAssemblyInstrExceptRef.td to WebAssemblyInstrRef.td in order
to use the file for other reference types in future.

Reviewers: dschuff

Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64703

llvm-svn: 366145
---
 lld/wasm/WriterUtils.cpp                      |  4 +--
 llvm/include/llvm/BinaryFormat/Wasm.h         |  4 +--
 llvm/include/llvm/CodeGen/ValueTypes.td       |  2 +-
 llvm/include/llvm/Support/MachineValueType.h  |  4 +--
 llvm/lib/CodeGen/ValueTypes.cpp               |  2 +-
 .../AsmParser/WebAssemblyAsmParser.cpp        |  6 ++---
 .../MCTargetDesc/WebAssemblyInstPrinter.cpp   |  4 +--
 .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp  |  4 +--
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    | 22 +++++++--------
 .../WebAssembly/WebAssemblyCFGStackify.cpp    | 21 +++++++--------
 .../WebAssemblyCallIndirectFixup.cpp          |  4 +--
 .../WebAssembly/WebAssemblyExplicitLocals.cpp | 20 +++++++-------
 .../WebAssembly/WebAssemblyFastISel.cpp       | 26 +++++++++---------
 .../WebAssembly/WebAssemblyInstrCall.td       | 12 ++++-----
 .../WebAssembly/WebAssemblyInstrControl.td    | 14 +++++-----
 .../WebAssembly/WebAssemblyInstrExceptRef.td  | 26 ------------------
 .../WebAssembly/WebAssemblyInstrInfo.cpp      |  4 +--
 .../WebAssembly/WebAssemblyInstrInfo.td       |  6 ++---
 .../Target/WebAssembly/WebAssemblyInstrRef.td | 25 +++++++++++++++++
 .../WebAssembly/WebAssemblyLateEHPrepare.cpp  | 27 +++++++++----------
 .../WebAssembly/WebAssemblyRegStackify.cpp    |  6 ++---
 .../WebAssembly/WebAssemblyRegisterInfo.td    |  4 +--
 llvm/test/CodeGen/WebAssembly/exception.ll    | 12 ++++-----
 .../test/MC/Disassembler/WebAssembly/wasm.txt |  2 +-
 llvm/test/MC/WebAssembly/basic-assembly.s     |  4 +--
 llvm/test/MC/WebAssembly/objdump.s            |  4 +--
 .../WebAssemblyExceptionInfoTest.cpp          | 16 +++++------
 llvm/utils/TableGen/CodeGenTarget.cpp         |  2 +-
 28 files changed, 141 insertions(+), 146 deletions(-)
 delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
 create mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td

diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp
index d45f6a4c77f6b..913723c4b6703 100644
--- a/lld/wasm/WriterUtils.cpp
+++ b/lld/wasm/WriterUtils.cpp
@@ -182,8 +182,8 @@ std::string lld::toString(ValType type) {
     return "f64";
   case ValType::V128:
     return "v128";
-  case ValType::EXCEPT_REF:
-    return "except_ref";
+  case ValType::EXNREF:
+    return "exnref";
   }
   llvm_unreachable("Invalid wasm::ValType");
 }
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 0ff52cbdb3375..4f6c24bbc68df 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -224,7 +224,7 @@ enum : unsigned {
   WASM_TYPE_F64 = 0x7C,
   WASM_TYPE_V128 = 0x7B,
   WASM_TYPE_FUNCREF = 0x70,
-  WASM_TYPE_EXCEPT_REF = 0x68,
+  WASM_TYPE_EXNREF = 0x68,
   WASM_TYPE_FUNC = 0x60,
   WASM_TYPE_NORESULT = 0x40, // for blocks with no result values
 };
@@ -332,7 +332,7 @@ enum class ValType {
   F32 = WASM_TYPE_F32,
   F64 = WASM_TYPE_F64,
   V128 = WASM_TYPE_V128,
-  EXCEPT_REF = WASM_TYPE_EXCEPT_REF,
+  EXNREF = WASM_TYPE_EXNREF,
 };
 
 struct WasmSignature {
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index feea7e5efe007..5818ac183fcc4 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -160,7 +160,7 @@ def x86mmx : ValueType<64 , 125>;   // X86 MMX value
 def FlagVT : ValueType<0  , 126>;   // Pre-RA sched glue
 def isVoid : ValueType<0  , 127>;   // Produces no value
 def untyped: ValueType<8  , 128>;   // Produces an untyped value
-def ExceptRef: ValueType<0, 129>;   // WebAssembly's except_ref type
+def exnref: ValueType<0, 129>;      // WebAssembly's exnref type
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata
 
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index a9b130f436502..b94d2c4836cc2 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -206,7 +206,7 @@ namespace llvm {
                                // unspecified type.  The register class
                                // will be determined by the opcode.
 
-      ExceptRef      = 129,    // WebAssembly's except_ref type
+      exnref         =  129,   // WebAssembly's exnref type
 
       FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
       LAST_VALUETYPE =  130,   // This always remains at the end of the list.
@@ -811,7 +811,7 @@ namespace llvm {
       case v1024f32:  return 32768;
       case v2048i32:
       case v2048f32:  return 65536;
-      case ExceptRef: return 0; // opaque type
+      case exnref: return 0; // opaque type
       }
     }
 
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index ac08877eb6648..a911cdcbec9db 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -207,7 +207,7 @@ std::string EVT::getEVTString() const {
   case MVT::v8f64:   return "v8f64";
   case MVT::Metadata:return "Metadata";
   case MVT::Untyped: return "Untyped";
-  case MVT::ExceptRef: return "ExceptRef";
+  case MVT::exnref : return "exnref";
   }
 }
 
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index e9a7f6977c2d3..09628e872dd56 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -308,8 +308,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
         Type == "i32x4" || Type == "i64x2" || Type == "f32x4" ||
         Type == "f64x2")
       return wasm::ValType::V128;
-    if (Type == "except_ref")
-      return wasm::ValType::EXCEPT_REF;
+    if (Type == "exnref")
+      return wasm::ValType::EXNREF;
     return Optional<wasm::ValType>();
   }
 
@@ -320,7 +320,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
         .Case("f32", WebAssembly::ExprType::F32)
         .Case("f64", WebAssembly::ExprType::F64)
         .Case("v128", WebAssembly::ExprType::V128)
-        .Case("except_ref", WebAssembly::ExprType::ExceptRef)
+        .Case("exnref", WebAssembly::ExprType::Exnref)
         .Case("void", WebAssembly::ExprType::Void)
         .Default(WebAssembly::ExprType::Invalid);
   }
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index cfa808b750add..a439b724d9674 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -282,8 +282,8 @@ const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) {
     return "funcref";
   case wasm::WASM_TYPE_FUNC:
     return "func";
-  case wasm::WASM_TYPE_EXCEPT_REF:
-    return "except_ref";
+  case wasm::WASM_TYPE_EXNREF:
+    return "exnref";
   case wasm::WASM_TYPE_NORESULT:
     return "void";
   default:
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 21545c39164e9..9c8ca1f13b184 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -146,8 +146,8 @@ wasm::ValType WebAssembly::toValType(const MVT &Ty) {
   case MVT::v4f32:
   case MVT::v2f64:
     return wasm::ValType::V128;
-  case MVT::ExceptRef:
-    return wasm::ValType::EXCEPT_REF;
+  case MVT::exnref:
+    return wasm::ValType::EXNREF;
   default:
     llvm_unreachable("unexpected type");
   }
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index a0d526b8a2e03..31ad88b3549c7 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -130,7 +130,7 @@ enum class ExprType : unsigned {
   F32 = 0x7D,
   F64 = 0x7C,
   V128 = 0x7B,
-  ExceptRef = 0x68,
+  Exnref = 0x68,
   Invalid = 0x00
 };
 
@@ -403,8 +403,8 @@ inline bool isCopy(unsigned Opc) {
   case WebAssembly::COPY_F64_S:
   case WebAssembly::COPY_V128:
   case WebAssembly::COPY_V128_S:
-  case WebAssembly::COPY_EXCEPT_REF:
-  case WebAssembly::COPY_EXCEPT_REF_S:
+  case WebAssembly::COPY_EXNREF:
+  case WebAssembly::COPY_EXNREF_S:
     return true;
   default:
     return false;
@@ -453,8 +453,8 @@ inline bool isCallDirect(unsigned Opc) {
   case WebAssembly::CALL_v4f32_S:
   case WebAssembly::CALL_v2f64:
   case WebAssembly::CALL_v2f64_S:
-  case WebAssembly::CALL_ExceptRef:
-  case WebAssembly::CALL_ExceptRef_S:
+  case WebAssembly::CALL_exnref:
+  case WebAssembly::CALL_exnref_S:
   case WebAssembly::RET_CALL:
   case WebAssembly::RET_CALL_S:
     return true;
@@ -487,8 +487,8 @@ inline bool isCallIndirect(unsigned Opc) {
   case WebAssembly::CALL_INDIRECT_v4f32_S:
   case WebAssembly::CALL_INDIRECT_v2f64:
   case WebAssembly::CALL_INDIRECT_v2f64_S:
-  case WebAssembly::CALL_INDIRECT_ExceptRef:
-  case WebAssembly::CALL_INDIRECT_ExceptRef_S:
+  case WebAssembly::CALL_INDIRECT_exnref:
+  case WebAssembly::CALL_INDIRECT_exnref_S:
   case WebAssembly::RET_CALL_INDIRECT:
   case WebAssembly::RET_CALL_INDIRECT_S:
     return true;
@@ -530,8 +530,8 @@ inline unsigned getCalleeOpNo(unsigned Opc) {
   case WebAssembly::CALL_v4f32_S:
   case WebAssembly::CALL_v2f64:
   case WebAssembly::CALL_v2f64_S:
-  case WebAssembly::CALL_ExceptRef:
-  case WebAssembly::CALL_ExceptRef_S:
+  case WebAssembly::CALL_exnref:
+  case WebAssembly::CALL_exnref_S:
   case WebAssembly::CALL_INDIRECT_i32:
   case WebAssembly::CALL_INDIRECT_i32_S:
   case WebAssembly::CALL_INDIRECT_i64:
@@ -552,8 +552,8 @@ inline unsigned getCalleeOpNo(unsigned Opc) {
   case WebAssembly::CALL_INDIRECT_v4f32_S:
   case WebAssembly::CALL_INDIRECT_v2f64:
   case WebAssembly::CALL_INDIRECT_v2f64_S:
-  case WebAssembly::CALL_INDIRECT_ExceptRef:
-  case WebAssembly::CALL_INDIRECT_ExceptRef_S:
+  case WebAssembly::CALL_INDIRECT_exnref:
+  case WebAssembly::CALL_INDIRECT_exnref_S:
     return 1;
   default:
     llvm_unreachable("Not a call instruction");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index a23a47d2e89ad..e6bfc5226e2eb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -308,7 +308,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) {
 
   // Add the BLOCK.
 
-  // 'br_on_exn' extracts except_ref object and pushes variable number of values
+  // 'br_on_exn' extracts exnref object and pushes variable number of values
   // depending on its tag. For C++ exception, its a single i32 value, and the
   // generated code will be in the form of:
   // block i32
@@ -766,11 +766,11 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
   // Note that the new wrapping block/end_block will be generated later in
   // placeBlockMarker.
   //
-  // TODO Currently local.set and local.gets are generated to move except_ref
-  // value created by catches. That's because we don't support yielding values
-  // from a block in LLVM machine IR yet, even though it is supported by wasm.
-  // Delete unnecessary local.get/local.sets once yielding values from a block
-  // is supported. The full EH spec requires multi-value support to do this, but
+  // TODO Currently local.set and local.gets are generated to move exnref value
+  // created by catches. That's because we don't support yielding values from a
+  // block in LLVM machine IR yet, even though it is supported by wasm. Delete
+  // unnecessary local.get/local.sets once yielding values from a block is
+  // supported. The full EH spec requires multi-value support to do this, but
   // for C++ we don't yet need it because we only throw a single i32.
   //
   // ---
@@ -834,7 +834,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
   DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> UnwindDestToTryRanges;
   // In new CFG, <destination to branch to, a vector of try ranges>
   DenseMap<MachineBasicBlock *, SmallVector<TryRange, 4>> BrDestToTryRanges;
-  // In new CFG, <destination to branch to, register containing except_ref>
+  // In new CFG, <destination to branch to, register containing exnref>
   DenseMap<MachineBasicBlock *, unsigned> BrDestToExnReg;
 
   // Gather possibly throwing calls (i.e., previously invokes) whose current
@@ -936,8 +936,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) {
   // of the function with a local.get and a rethrow instruction.
   if (NeedAppendixBlock) {
     auto *AppendixBB = getAppendixBlock(MF);
-    unsigned ExnReg =
-        MRI.createVirtualRegister(&WebAssembly::EXCEPT_REFRegClass);
+    unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
     BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW))
         .addReg(ExnReg);
     // These instruction ranges should branch to this appendix BB.
@@ -1225,8 +1224,8 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) {
   case MVT::v2f64:
     RetType = WebAssembly::ExprType::V128;
     break;
-  case MVT::ExceptRef:
-    RetType = WebAssembly::ExprType::ExceptRef;
+  case MVT::exnref:
+    RetType = WebAssembly::ExprType::Exnref;
     break;
   default:
     llvm_unreachable("unexpected return type");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index 313147c943141..2537e6042b1e3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -85,8 +85,8 @@ static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
     return CALL_INDIRECT_v4f32;
   case PCALL_INDIRECT_v2f64:
     return CALL_INDIRECT_v2f64;
-  case PCALL_INDIRECT_ExceptRef:
-    return CALL_INDIRECT_ExceptRef;
+  case PCALL_INDIRECT_exnref:
+    return CALL_INDIRECT_exnref;
   case PRET_CALL_INDIRECT:
     return RET_CALL_INDIRECT;
   default:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index de7e912129fb6..dbd62179f055f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -90,8 +90,8 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::DROP_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::DROP_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::DROP_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::DROP_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -107,8 +107,8 @@ static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::LOCAL_GET_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::LOCAL_GET_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::LOCAL_GET_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::LOCAL_GET_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -124,8 +124,8 @@ static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::LOCAL_SET_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::LOCAL_SET_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::LOCAL_SET_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::LOCAL_SET_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -141,8 +141,8 @@ static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) {
     return WebAssembly::LOCAL_TEE_F64;
   if (RC == &WebAssembly::V128RegClass)
     return WebAssembly::LOCAL_TEE_V128;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return WebAssembly::LOCAL_TEE_EXCEPT_REF;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return WebAssembly::LOCAL_TEE_EXNREF;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -158,8 +158,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) {
     return MVT::f64;
   if (RC == &WebAssembly::V128RegClass)
     return MVT::v16i8;
-  if (RC == &WebAssembly::EXCEPT_REFRegClass)
-    return MVT::ExceptRef;
+  if (RC == &WebAssembly::EXNREFRegClass)
+    return MVT::exnref;
   llvm_unreachable("unrecognized register class");
 }
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 4fff49f54d765..1a24f749b5644 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -129,7 +129,7 @@ class WebAssemblyFastISel final : public FastISel {
     case MVT::i64:
     case MVT::f32:
     case MVT::f64:
-    case MVT::ExceptRef:
+    case MVT::exnref:
       return VT;
     case MVT::f16:
       return MVT::f32;
@@ -698,9 +698,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       Opc = WebAssembly::ARGUMENT_v2f64;
       RC = &WebAssembly::V128RegClass;
       break;
-    case MVT::ExceptRef:
-      Opc = WebAssembly::ARGUMENT_ExceptRef;
-      RC = &WebAssembly::EXCEPT_REFRegClass;
+    case MVT::exnref:
+      Opc = WebAssembly::ARGUMENT_exnref;
+      RC = &WebAssembly::EXNREFRegClass;
       break;
     default:
       return false;
@@ -815,10 +815,10 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
                      : WebAssembly::PCALL_INDIRECT_v2f64;
       ResultReg = createResultReg(&WebAssembly::V128RegClass);
       break;
-    case MVT::ExceptRef:
-      Opc = IsDirect ? WebAssembly::CALL_ExceptRef
-                     : WebAssembly::PCALL_INDIRECT_ExceptRef;
-      ResultReg = createResultReg(&WebAssembly::EXCEPT_REFRegClass);
+    case MVT::exnref:
+      Opc = IsDirect ? WebAssembly::CALL_exnref
+                     : WebAssembly::PCALL_INDIRECT_exnref;
+      ResultReg = createResultReg(&WebAssembly::EXNREFRegClass);
       break;
     default:
       return false;
@@ -921,9 +921,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
     Opc = WebAssembly::SELECT_F64;
     RC = &WebAssembly::F64RegClass;
     break;
-  case MVT::ExceptRef:
-    Opc = WebAssembly::SELECT_EXCEPT_REF;
-    RC = &WebAssembly::EXCEPT_REFRegClass;
+  case MVT::exnref:
+    Opc = WebAssembly::SELECT_EXNREF;
+    RC = &WebAssembly::EXNREFRegClass;
     break;
   default:
     return false;
@@ -1341,8 +1341,8 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   case MVT::v2f64:
     Opc = WebAssembly::RETURN_v2f64;
     break;
-  case MVT::ExceptRef:
-    Opc = WebAssembly::RETURN_EXCEPT_REF;
+  case MVT::exnref:
+    Opc = WebAssembly::RETURN_EXNREF;
     break;
   default:
     return false;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index bcff9f23608bb..703c15d58c93a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -59,7 +59,7 @@ defm "" : CALL<i32, I32, "i32.">;
 defm "" : CALL<i64, I64, "i64.">;
 defm "" : CALL<f32, F32, "f32.">;
 defm "" : CALL<f64, F64, "f64.">;
-defm "" : CALL<ExceptRef, EXCEPT_REF, "except_ref.", [HasExceptionHandling]>;
+defm "" : CALL<exnref, EXNREF, "exnref.", [HasExceptionHandling]>;
 defm "" : CALL<v16i8, V128, "v128.", [HasSIMD128]>;
 defm "" : CALL<v8i16, V128, "v128.", [HasSIMD128]>;
 defm "" : CALL<v4i32, V128, "v128.", [HasSIMD128]>;
@@ -139,9 +139,8 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(ExceptRef
-           (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
-          (CALL_ExceptRef tglobaladdr:$callee)>,
+def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_exnref tglobaladdr:$callee)>,
       Requires<[HasExceptionHandling]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
           (CALL_VOID tglobaladdr:$callee)>;
@@ -169,9 +168,8 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>;
-def : Pat<(ExceptRef
-           (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
-          (CALL_ExceptRef texternalsym:$callee)>,
+def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_exnref texternalsym:$callee)>,
       Requires<[HasExceptionHandling]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
           (CALL_VOID texternalsym:$callee)>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 574cb09ff336e..1870c5bc34b06 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -114,7 +114,7 @@ let isReturn = 1 in {
   defm "": RETURN<I64>;
   defm "": RETURN<F32>;
   defm "": RETURN<F64>;
-  defm "": RETURN<EXCEPT_REF>;
+  defm "": RETURN<EXNREF>;
   defm "": SIMD_RETURN<v16i8>;
   defm "": SIMD_RETURN<v8i16>;
   defm "": SIMD_RETURN<v4i32>;
@@ -144,8 +144,8 @@ defm THROW : I<(outs), (ins event_op:$tag, variable_ops),
                (outs), (ins event_op:$tag),
                [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))],
                "throw   \t$tag", "throw   \t$tag", 0x08>;
-defm RETHROW : I<(outs), (ins EXCEPT_REF:$exn), (outs), (ins),
-                 [], "rethrow \t$exn", "rethrow", 0x09>;
+defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [],
+                 "rethrow \t$exn", "rethrow", 0x09>;
 // Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch
 // intrinsic. Will be converted to the real rethrow instruction later.
 let isPseudo = 1 in
@@ -161,15 +161,15 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
 
 // Catching an exception: catch / extract_exception
 let hasCtrlDep = 1, hasSideEffects = 1 in
-defm CATCH : I<(outs EXCEPT_REF:$dst), (ins), (outs), (ins), [],
+defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [],
                "catch   \t$dst", "catch", 0x07>;
 
 // Querying / extracing exception: br_on_exn
-// br_on_exn queries an except_ref to see if it matches the corresponding
-// exception tag index. If true it branches to the given label and pushes the
+// br_on_exn queries an exnref to see if it matches the corresponding exception
+// tag index. If true it branches to the given label and pushes the
 // corresponding argument values of the exception onto the stack.
 let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in
-defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXCEPT_REF:$exn),
+defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn),
                    (outs), (ins bb_op:$dst, event_op:$tag), [],
                    "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag",
                    0x0a>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
deleted file mode 100644
index 33a4f2519545d..0000000000000
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td
+++ /dev/null
@@ -1,26 +0,0 @@
-// WebAssemblyInstrExceptRef.td-WebAssembly except_ref codegen --*- tablegen -*-
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// WebAssembly except_ref operand code-gen constructs.
-///
-//===----------------------------------------------------------------------===//
-
-defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst),
-                           (ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond),
-                           (outs), (ins),
-                           [(set EXCEPT_REF:$dst,
-                            (select I32:$cond, EXCEPT_REF:$lhs,
-                             EXCEPT_REF:$rhs))],
-                           "except_ref.select\t$dst, $lhs, $rhs, $cond",
-                           "except_ref.select", 0x1b>;
-
-def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
-          (SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>;
-def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs),
-          (SELECT_EXCEPT_REF EXCEPT_REF:$rhs, EXCEPT_REF:$lhs, I32:$cond)>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index ee6981135526b..d7022ce0bfba3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -192,7 +192,7 @@ unsigned WebAssemblyInstrInfo::insertBranch(
   MachineFunction &MF = *MBB.getParent();
   auto &MRI = MF.getRegInfo();
   bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) ==
-                                          &WebAssembly::EXCEPT_REFRegClass;
+                                          &WebAssembly::EXNREFRegClass;
 
   if (Cond[0].getImm()) {
     if (IsBrOnExn) {
@@ -222,7 +222,7 @@ bool WebAssemblyInstrInfo::reverseBranchCondition(
   MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent();
   auto &MRI = MF.getRegInfo();
   if (Cond[1].isReg() &&
-      MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXCEPT_REFRegClass)
+      MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass)
     return true;
 
   Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 859cb9bcdfd97..73ddbe85d5511 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -224,7 +224,7 @@ defm "": ARGUMENT<I32, i32>;
 defm "": ARGUMENT<I64, i64>;
 defm "": ARGUMENT<F32, f32>;
 defm "": ARGUMENT<F64, f64>;
-defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
+defm "": ARGUMENT<EXNREF, exnref>;
 
 // local.get and local.set are not generated by instruction selection; they
 // are implied by virtual register uses and defs.
@@ -294,7 +294,7 @@ defm "" : LOCAL<I64>;
 defm "" : LOCAL<F32>;
 defm "" : LOCAL<F64>;
 defm "" : LOCAL<V128>, Requires<[HasSIMD128]>;
-defm "" : LOCAL<EXCEPT_REF>, Requires<[HasExceptionHandling]>;
+defm "" : LOCAL<EXNREF>, Requires<[HasExceptionHandling]>;
 
 let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
 defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm),
@@ -345,5 +345,5 @@ include "WebAssemblyInstrConv.td"
 include "WebAssemblyInstrFloat.td"
 include "WebAssemblyInstrAtomics.td"
 include "WebAssemblyInstrSIMD.td"
-include "WebAssemblyInstrExceptRef.td"
+include "WebAssemblyInstrRef.td"
 include "WebAssemblyInstrBulkMemory.td"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
new file mode 100644
index 0000000000000..afe89de60b361
--- /dev/null
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td
@@ -0,0 +1,25 @@
+// WebAssemblyInstrRef.td - WebAssembly reference type codegen --*- tablegen -*-
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// WebAssembly refence type operand codegen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm SELECT_EXNREF : I<(outs EXNREF:$dst),
+                       (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond),
+                       (outs), (ins),
+                       [(set EXNREF:$dst,
+                         (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))],
+                       "exnref.select\t$dst, $lhs, $rhs, $cond",
+                       "exnref.select", 0x1b>;
+
+def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
+          (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs),
+          (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
index 49258ded76622..e92b344302725 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp
@@ -131,8 +131,7 @@ bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) {
       auto InsertPos = MBB.begin();
       if (InsertPos->isEHLabel()) // EH pad starts with an EH label
         ++InsertPos;
-      unsigned DstReg =
-          MRI.createVirtualRegister(&WebAssembly::EXCEPT_REFRegClass);
+      unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass);
       BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(),
               TII.get(WebAssembly::CATCH), DstReg);
     }
@@ -209,23 +208,23 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables(
 }
 
 // Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes
-// except_ref type object returned by 'catch', and branches to the destination
-// if it matches a given tag. We currently use __cpp_exception symbol to
-// represent the tag for all C++ exceptions.
+// exnref type object returned by 'catch', and branches to the destination if it
+// matches a given tag. We currently use __cpp_exception symbol to represent the
+// tag for all C++ exceptions.
 //
 // block $l (result i32)
 //   ...
-//   ;; except_ref $e is on the stack at this point
+//   ;; exnref $e is on the stack at this point
 //   br_on_exn $l $e ;; branch to $l with $e's arguments
 //   ...
 // end
 // ;; Here we expect the extracted values are on top of the wasm value stack
 // ... Handle exception using values ...
 //
-// br_on_exn takes an except_ref object and branches if it matches the given
-// tag. There can be multiple br_on_exn instructions if we want to match for
-// another tag, but for now we only test for __cpp_exception tag, and if it does
-// not match, i.e., it is a foreign exception, we rethrow it.
+// br_on_exn takes an exnref object and branches if it matches the given tag.
+// There can be multiple br_on_exn instructions if we want to match for another
+// tag, but for now we only test for __cpp_exception tag, and if it does not
+// match, i.e., it is a foreign exception, we rethrow it.
 //
 // In the destination BB that's the target of br_on_exn, extracted exception
 // values (in C++'s case a single i32, which represents an exception pointer)
@@ -279,13 +278,13 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
 
     // - Before:
     // ehpad:
-    //   %exnref:except_ref = catch
+    //   %exnref:exnref = catch
     //   %exn:i32 = extract_exception
     //   ... use exn ...
     //
     // - After:
     // ehpad:
-    //   %exnref:except_ref = catch
+    //   %exnref:exnref = catch
     //   br_on_exn %thenbb, $__cpp_exception, %exnref
     //   br %elsebb
     // elsebb:
@@ -317,14 +316,14 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) {
     //
     // - Before:
     // ehpad:
-    //   %exnref:except_ref = catch
+    //   %exnref:exnref = catch
     //   %exn:i32 = extract_exception
     //   call @__clang_call_terminate(%exn)
     //   unreachable
     //
     // - After:
     // ehpad:
-    //   %exnref:except_ref = catch
+    //   %exnref:exnref = catch
     //   br_on_exn %thenbb, $__cpp_exception, %exnref
     //   br %elsebb
     // elsebb:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 31ba6f0e4c237..a120a6471014c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -834,9 +834,9 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         // entering blocks, which is a part of multi-value proposal.
         //
         // Once we support live-in values of wasm blocks, this can be:
-        // catch                           ; push except_ref value onto stack
-        // block except_ref -> i32
-        // br_on_exn $__cpp_exception      ; pop the except_ref value
+        // catch                           ; push exnref value onto stack
+        // block exnref -> i32
+        // br_on_exn $__cpp_exception      ; pop the exnref value
         // end_block
         //
         // But because we don't support it yet, the catch instruction's dst
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 4d202f70caad7..6d3d6c723277d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -43,7 +43,7 @@ def F64_0 : WebAssemblyReg<"%f64.0">;
 
 def V128_0: WebAssemblyReg<"%v128">;
 
-def EXCEPT_REF_0 : WebAssemblyReg<"%except_ref.0">;
+def EXNREF_0 : WebAssemblyReg<"%exnref.0">;
 
 // The value stack "register". This is an opaque entity which serves to order
 // uses and defs that must remain in LIFO order.
@@ -64,4 +64,4 @@ def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
 def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
 def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128,
                                (add V128_0)>;
-def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>;
+def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>;
diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception.ll
index 8f7687e2da485..fc61f4099e0ed 100644
--- a/llvm/test/CodeGen/WebAssembly/exception.ll
+++ b/llvm/test/CodeGen/WebAssembly/exception.ll
@@ -31,11 +31,11 @@ define void @test_throw(i8* %p) {
 ; CHECK:     global.get  ${{.+}}=, __stack_pointer
 ; CHECK:     try
 ; CHECK:       call      foo
-; CHECK:     catch     $[[EXCEPT_REF:[0-9]+]]=
+; CHECK:     catch     $[[EXNREF:[0-9]+]]=
 ; CHECK:       global.set  __stack_pointer
 ; CHECK:       block i32
-; CHECK:         br_on_exn 0, __cpp_exception, $[[EXCEPT_REF]]
-; CHECK:         rethrow   $[[EXCEPT_REF]]
+; CHECK:         br_on_exn 0, __cpp_exception, $[[EXNREF]]
+; CHECK:         rethrow   $[[EXNREF]]
 ; CHECK:       end_block
 ; CHECK:       extract_exception $[[EXN:[0-9]+]]=
 ; CHECK-DAG:   i32.store  __wasm_lpad_context
@@ -47,7 +47,7 @@ define void @test_throw(i8* %p) {
 ; CHECK:         call      __cxa_end_catch
 ; CHECK:         br        1
 ; CHECK:       end_block
-; CHECK:       rethrow   $[[EXCEPT_REF]]
+; CHECK:       rethrow   $[[EXNREF]]
 ; CHECK:     end_try
 define void @test_catch() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
@@ -92,10 +92,10 @@ try.cont:                                         ; preds = %entry, %catch
 ; CHECK-LABEL: test_cleanup:
 ; CHECK: try
 ; CHECK:   call      foo
-; CHECK: catch     $[[EXCEPT_REF:[0-9]+]]=
+; CHECK: catch     $[[EXNREF:[0-9]+]]=
 ; CHECK:   global.set  __stack_pointer
 ; CHECK:   i32.call  $drop=, _ZN4TempD2Ev
-; CHECK:   rethrow   $[[EXCEPT_REF]]
+; CHECK:   rethrow   $[[EXNREF]]
 ; CHECK: end_try
 define void @test_cleanup() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) {
 entry:
diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt
index e979bac69128c..08cc95434c16d 100644
--- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt
+++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt
@@ -23,7 +23,7 @@
 0x11 0x80 0x01 0x00
 
 # CHECK: call 0
-# CHECK-NOT: except_ref.call 0
+# CHECK-NOT: exnref.call 0
 0x10 0x00
 
 # CHECK: local.get 128
diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s
index c3b7e9da25de4..23b1a0940f637 100644
--- a/llvm/test/MC/WebAssembly/basic-assembly.s
+++ b/llvm/test/MC/WebAssembly/basic-assembly.s
@@ -70,7 +70,7 @@ test0:
     # TODO: enable once instruction has been added.
     #i32x4.trunc_sat_f32x4_s
     i32.trunc_f32_s
-    try         except_ref
+    try         exnref
     i32.atomic.load 0
     atomic.notify 0
 .LBB0_3:
@@ -172,7 +172,7 @@ test0:
 # CHECK-NEXT:      end_if
 # CHECK-NEXT:      f32x4.add
 # CHECK-NEXT:      i32.trunc_f32_s
-# CHECK-NEXT:      try         except_ref
+# CHECK-NEXT:      try         exnref
 # CHECK-NEXT:      i32.atomic.load 0
 # CHECK-NEXT:      atomic.notify 0
 # CHECK-NEXT:  .LBB0_3:
diff --git a/llvm/test/MC/WebAssembly/objdump.s b/llvm/test/MC/WebAssembly/objdump.s
index f1cedc7db86e9..4030ba9c2c76f 100644
--- a/llvm/test/MC/WebAssembly/objdump.s
+++ b/llvm/test/MC/WebAssembly/objdump.s
@@ -9,7 +9,7 @@ test0:
 
 test1:
     .functype   test1 (i32, i64) -> (i32)
-    .local      i32, i64, except_ref
+    .local      i32, i64, exnref
     local.get   3
     end_function
 
@@ -21,6 +21,6 @@ test1:
 # CHECK-NEXT:       9:       20 02  local.get	2
 # CHECK-NEXT:       b:       0b     end
 # CHECK-LABEL: test1:
-# CHECK-NEXT:        .local  i32, i64, except_ref
+# CHECK-NEXT:        .local  i32, i64, exnref
 # CHECK-NEXT:      14:       20 03  local.get	3
 # CHECK-NEXT:      16:       0b     end
diff --git a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp
index ec946379b1e9e..49a469bdef789 100644
--- a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp
+++ b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp
@@ -100,14 +100,14 @@ body: |
   ; predecessors: %bb.0
     successors: %bb.3, %bb.9
     liveins: $value_stack
-    %0:except_ref = CATCH implicit-def $arguments
+    %0:exnref = CATCH implicit-def $arguments
     CLEANUPRET implicit-def dead $arguments
 
   bb.3 (landing-pad):
   ; predecessors: %bb.2
     successors: %bb.4, %bb.6
     liveins: $value_stack
-    %1:except_ref = CATCH implicit-def $arguments
+    %1:exnref = CATCH implicit-def $arguments
     BR_IF %bb.4, %58:i32, implicit-def $arguments, implicit-def $value_stack, implicit $value_stack
     BR %bb.6, implicit-def $arguments
 
@@ -138,13 +138,13 @@ body: |
   ; predecessors: %bb.4
     successors: %bb.9
     liveins: $value_stack
-    %2:except_ref = CATCH implicit-def $arguments
+    %2:exnref = CATCH implicit-def $arguments
     CLEANUPRET implicit-def dead $arguments
 
   bb.9 (landing-pad):
   ; predecessors: %bb.2, %bb.6, %bb.8
     liveins: $value_stack
-    %3:except_ref = CATCH implicit-def $arguments
+    %3:exnref = CATCH implicit-def $arguments
     CLEANUPRET implicit-def dead $arguments
 
   bb.10:
@@ -257,7 +257,7 @@ body: |
   ; predecessors: %bb.0
     successors: %bb.2, %bb.8
     liveins: $value_stack
-    %0:except_ref = CATCH implicit-def $arguments
+    %0:exnref = CATCH implicit-def $arguments
     BR_IF %bb.2, %32:i32, implicit-def $arguments, implicit-def $value_stack, implicit $value_stack
     BR %bb.8, implicit-def $arguments
 
@@ -271,7 +271,7 @@ body: |
   ; predecessors: %bb.2
     successors: %bb.4, %bb.6
     liveins: $value_stack
-    %1:except_ref = CATCH implicit-def $arguments
+    %1:exnref = CATCH implicit-def $arguments
     BR_IF %bb.4, %43:i32, implicit-def $arguments, implicit-def $value_stack, implicit $value_stack
     BR %bb.6, implicit-def $arguments
 
@@ -313,13 +313,13 @@ body: |
   ; predecessors: %bb.4
     successors: %bb.11
     liveins: $value_stack
-    %2:except_ref = CATCH implicit-def $arguments
+    %2:exnref = CATCH implicit-def $arguments
     CLEANUPRET implicit-def dead $arguments
 
   bb.11 (landing-pad):
   ; predecessors: %bb.2, %bb.6, %bb.10
     liveins: $value_stack
-    %3:except_ref = CATCH implicit-def $arguments
+    %3:exnref = CATCH implicit-def $arguments
     CLEANUPRET implicit-def dead $arguments
 
   bb.12:
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index dc45b1d5d35b8..702317283f908 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -191,7 +191,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
   case MVT::Untyped:  return "MVT::Untyped";
-  case MVT::ExceptRef: return "MVT::ExceptRef";
+  case MVT::exnref:   return "MVT::exnref";
   default: llvm_unreachable("ILLEGAL VALUE TYPE!");
   }
 }

From 199f8721e6a17242ded2f7a0e11211f2d6a69f75 Mon Sep 17 00:00:00 2001
From: Bob Haarman <llvm@inglorion.net>
Date: Mon, 15 Jul 2019 22:50:04 +0000
Subject: [PATCH 178/451] add -fthinlto-index= option to clang-cl

Summary:
This adds a -fthinlto-index= option to clang-cl, which allows it to
be used to drive ThinLTO backend passes. This allows clang-cl to be
used for distributed ThinLTO.

Reviewers: tejohnson, pcc, rnk

Subscribers: mehdi_amini, steven_wu, dexonsmith, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64458

llvm-svn: 366146
---
 clang/include/clang/Driver/Options.td  | 2 +-
 clang/test/Driver/cl-thinlto-backend.c | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Driver/cl-thinlto-backend.c

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 957483c318647..dfd27fab796e3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1270,7 +1270,7 @@ def flto_jobs_EQ : Joined<["-"], "flto-jobs=">,
            "of 0 means the number of threads will be derived from "
            "the number of CPUs detected)">;
 def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">,
-  Flags<[CC1Option]>, Group<f_Group>,
+  Flags<[CoreOption, CC1Option]>, Group<f_Group>,
   HelpText<"Perform ThinLTO importing using provided function summary index">;
 def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
                                 Group<f_Group>, Flags<[DriverOption, CoreOption]>;
diff --git a/clang/test/Driver/cl-thinlto-backend.c b/clang/test/Driver/cl-thinlto-backend.c
new file mode 100644
index 0000000000000..a948c4ea33d9c
--- /dev/null
+++ b/clang/test/Driver/cl-thinlto-backend.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cl -c -flto=thin -Fo%t.obj %s
+// RUN: llvm-lto2 run -thinlto-distributed-indexes -o %t.exe %t.obj
+
+// -fthinlto_index should be passed to cc1
+// RUN: %clang_cl -### -c -fthinlto-index=%t.thinlto.bc -Fo%t1.obj \
+// RUN:     %t.obj 2>&1 | FileCheck %s
+
+// CHECK: -fthinlto-index=
+// CHECK: "-x" "ir"

From a28dcf693d15119cf8be96ce66f97bdf8d373eb6 Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Mon, 15 Jul 2019 22:52:01 +0000
Subject: [PATCH 179/451] [llvm-readelf] Print "File: lib.a(file.o)" info when
 dumping archive files.

Match GNU readelf.

https://bugs.llvm.org/show_bug.cgi?id=35351

Reviewers: jhenderson, grimar, MaskRay, rupprecht

Reviewed by: jhenderson, MaskRay, grimar

Differential Revision: https://reviews.llvm.org/D64361

llvm-svn: 366147
---
 llvm/test/tools/llvm-readobj/archive.test     | 25 +++++++++++++++----
 .../llvm-readobj/elf-dynamic-malformed.test   |  1 +
 .../macho-universal-x86_64.i386.test          | 18 ++++++++-----
 .../llvm-readobj/thin-archive-paths.test      |  8 +++---
 llvm/tools/llvm-readobj/llvm-readobj.cpp      | 17 +++++++++----
 5 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/llvm/test/tools/llvm-readobj/archive.test b/llvm/test/tools/llvm-readobj/archive.test
index 08b04fa4f03a0..1bb70d36270f3 100644
--- a/llvm/test/tools/llvm-readobj/archive.test
+++ b/llvm/test/tools/llvm-readobj/archive.test
@@ -2,39 +2,54 @@
 
 # RUN: rm -f %t.a
 # RUN: llvm-ar rc %t.a %p/Inputs/trivial.obj.elf-x86-64 %p/Inputs/trivial.obj.elf-i386 %p/Inputs/trivial.obj.coff-arm
-# RUN: llvm-readobj --all %t.a | FileCheck %s --check-prefixes=LLVM,COFF
-# RUN: llvm-readelf --all %t.a | FileCheck %s --check-prefixes=GNU,COFF
+# RUN: llvm-readobj --all %t.a | FileCheck %s -DARFILE="%t.a" --check-prefixes=HEADER,LLVM,COFF
+# RUN: llvm-readelf --all %t.a | FileCheck %s -DARFILE="%t.a" --check-prefixes=HEADER,GNU,COFF
 
-# LLVM: File: trivial.obj.elf-x86-64
+# LLVM: {{^$}}
+# LLVM-NEXT: File: [[ARFILE]](trivial.obj.elf-x86-64)
 # LLVM: Format: ELF64-x86-64
 # LLVM: ElfHeader {
 # LLVM:   Machine: EM_X86_64
 # LLVM: Sections [
 # LLVM: Relocations [
 # LLVM: Symbols [
+# LLVM: Notes [
+# LLVM: ]
 
+# GNU: {{^$}}
+# GNU-NEXT: File: [[ARFILE]](trivial.obj.elf-x86-64)
 # GNU: ELF Header:
 # GNU:   Machine: Advanced Micro Devices X86-64
 # GNU: Section Headers:
 # GNU: Relocation section '.rela.text'
 # GNU: Symbol table '.symtab'
+# GNU: There are no section groups in this file.
 
-# LLVM: File: trivial.obj.elf-i386
+# LLVM-EMPTY:
+# LLVM-NEXT: File: [[ARFILE]](trivial.obj.elf-i386)
 # LLVM: Format: ELF32-i386
 # LLVM: ElfHeader {
 # LLVM:   Machine: EM_386
 # LLVM: Sections [
 # LLVM: Relocations [
 # LLVM: Symbols [
+# LLVM: Notes [
+# LLVM: ]
 
+# GNU-EMPTY:
+# GNU-NEXT: File: [[ARFILE]](trivial.obj.elf-i386)
 # GNU: ELF Header:
 # GNU:   Machine: Intel 80386
 # GNU: Section Headers:
 # GNU: Relocation section '.rel.text'
 # GNU: Symbol table '.symtab'
+# GNU: There are no section groups in this file.
 
-# LLVM: File: trivial.obj.coff-arm
+# LLVM-EMPTY:
+# LLVM-NEXT: File: [[ARFILE]](trivial.obj.coff-arm)
 # LLVM: Format: COFF-ARM
+# GNU-EMPTY:
+# GNU-NEXT: File: [[ARFILE]](trivial.obj.coff-arm)
 # COFF: ImageFileHeader {
 # COFF:   Machine: IMAGE_FILE_MACHINE_ARMNT
 # COFF: Sections [
diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
index c8aa8d58eec92..e78e1affd5558 100644
--- a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
+++ b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test
@@ -20,6 +20,7 @@
 
 # WARN-GNU-NOT:  warning
 # WARN-GNU:      warning: invalid section size (4) or entity size (16)
+# WARN-GNU-EMPTY:
 # WARN-GNU-NEXT: ELF Header:
 # WARN-GNU:      Symbol table '.symtab' contains 1 entries:
 # WARN-GNU:        0:
diff --git a/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test b/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test
index b13bd455b175f..dc03ab367b14e 100644
--- a/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test
+++ b/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test
@@ -5,10 +5,12 @@ RUN: llvm-readobj --sections %p/Inputs/macho-universal.x86_64.i386 \
 RUN: | FileCheck %s -check-prefix MULTISECTIONS
 
 RUN: llvm-readobj -h %p/Inputs/macho-universal-archive.x86_64.i386 \
-RUN: | FileCheck %s -check-prefix MULTIHEADER-ARCHIVE
+RUN: | FileCheck %s -check-prefix MULTIHEADER-ARCHIVE \
+RUN:                -DARFILE="%p/Inputs/macho-universal-archive.x86_64.i386"
 
 RUN: llvm-readobj --sections %p/Inputs/macho-universal-archive.x86_64.i386 \
-RUN: | FileCheck %s -check-prefix MULTISECTIONS-ARCHIVE
+RUN: | FileCheck %s -check-prefix MULTISECTIONS-ARCHIVE \
+RUN:                -DARFILE="%p/Inputs/macho-universal-archive.x86_64.i386"
 
 MULTIHEADER: Format: Mach-O 64-bit x86-64
 MULTIHEADER: Arch: x86_64
@@ -146,7 +148,8 @@ MULTISECTIONS:     Reserved2: 0x0
 MULTISECTIONS:   }
 MULTISECTIONS: ]
 
-MULTIHEADER-ARCHIVE: File: hello.o
+MULTIHEADER-ARCHIVE: {{^$}}
+MULTIHEADER-ARCHIVE-NEXT: File: [[ARFILE]](hello.o)
 MULTIHEADER-ARCHIVE: Format: Mach-O 64-bit x86-64
 MULTIHEADER-ARCHIVE: Arch: x86_64
 MULTIHEADER-ARCHIVE: AddressSize: 64bit
@@ -162,7 +165,8 @@ MULTIHEADER-ARCHIVE:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
 MULTIHEADER-ARCHIVE:   ]
 MULTIHEADER-ARCHIVE:   Reserved: 0x0
 MULTIHEADER-ARCHIVE: }
-MULTIHEADER-ARCHIVE: File: foo.o
+MULTIHEADER-ARCHIVE-EMPTY:
+MULTIHEADER-ARCHIVE-NEXT: File: [[ARFILE]](foo.o)
 MULTIHEADER-ARCHIVE: Format: Mach-O 32-bit i386
 MULTIHEADER-ARCHIVE: Arch: i386
 MULTIHEADER-ARCHIVE: AddressSize: 32bit
@@ -178,7 +182,8 @@ MULTIHEADER-ARCHIVE:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
 MULTIHEADER-ARCHIVE:   ]
 MULTIHEADER-ARCHIVE: }
 
-MULTISECTIONS-ARCHIVE: File: hello.o
+MULTISECTIONS-ARCHIVE: {{^$}}
+MULTISECTIONS-ARCHIVE-NEXT: File: [[ARFILE]](hello.o)
 MULTISECTIONS-ARCHIVE: Format: Mach-O 64-bit x86-64
 MULTISECTIONS-ARCHIVE: Arch: x86_64
 MULTISECTIONS-ARCHIVE: AddressSize: 64bit
@@ -254,7 +259,8 @@ MULTISECTIONS-ARCHIVE:     Reserved1: 0x0
 MULTISECTIONS-ARCHIVE:     Reserved2: 0x0
 MULTISECTIONS-ARCHIVE:   }
 MULTISECTIONS-ARCHIVE: ]
-MULTISECTIONS-ARCHIVE: File: foo.o
+MULTISECTIONS-ARCHIVE-EMPTY:
+MULTISECTIONS-ARCHIVE-NEXT: File: [[ARFILE]](foo.o)
 MULTISECTIONS-ARCHIVE: Format: Mach-O 32-bit i386
 MULTISECTIONS-ARCHIVE: Arch: i386
 MULTISECTIONS-ARCHIVE: AddressSize: 32bit
diff --git a/llvm/test/tools/llvm-readobj/thin-archive-paths.test b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
index d7a971eb303d8..2e2ec56b99da5 100644
--- a/llvm/test/tools/llvm-readobj/thin-archive-paths.test
+++ b/llvm/test/tools/llvm-readobj/thin-archive-paths.test
@@ -8,8 +8,8 @@
 # RUN: llvm-ar rcT a/relative.a a/b/1.o
 
 # Show that relative paths in the file header printing look sensible.
-# RUN: llvm-readobj --file-headers a/relative.a | FileCheck %s --check-prefix=REL
-# REL: File: b/1.o
+# RUN: llvm-readobj --file-headers a/relative.a | FileCheck %s -DARFILE="a/relative.a" --check-prefix=REL
+# REL: File: [[ARFILE]](b/1.o)
 
 # Show that relative paths in an error message for both archive and member look
 # sensible.
@@ -23,8 +23,8 @@
 # RUN: llvm-ar rcT c/absolute.a %t/a/b/1.o
 
 # Show that absolute paths in the file header printing are correct.
-# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%/t
-# ABS: File: [[DIR]]/a/b/1.o
+# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DARFILE="c/absolute.a" -DDIR=%/t
+# ABS: File: [[ARFILE]]([[DIR]]/a/b/1.o)
 
 # Show that absolute paths in an error message for both archive and member are correct.
 # RUN: rm a/b/1.o
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index f00d94ee5c42c..b6d0493af700d 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -462,20 +462,27 @@ static std::error_code createDumper(const ObjectFile *Obj,
 }
 
 /// Dumps the specified object file.
-static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) {
+static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer,
+                       const Archive *A = nullptr) {
+  std::string FileStr =
+          A ? Twine(A->getFileName() + "(" + Obj->getFileName() + ")").str()
+            : Obj->getFileName().str();
+
   std::unique_ptr<ObjDumper> Dumper;
   if (std::error_code EC = createDumper(Obj, Writer, Dumper))
-    reportError(Obj->getFileName(), EC);
+    reportError(FileStr, EC);
 
+  Writer.startLine() << "\n";
   if (opts::Output == opts::LLVM) {
-    Writer.startLine() << "\n";
-    Writer.printString("File", Obj->getFileName());
+    Writer.printString("File", FileStr);
     Writer.printString("Format", Obj->getFileFormatName());
     Writer.printString("Arch", Triple::getArchTypeName(
                                    (llvm::Triple::ArchType)Obj->getArch()));
     Writer.printString("AddressSize",
                        formatv("{0}bit", 8 * Obj->getBytesInAddress()));
     Dumper->printLoadName();
+  } else if (opts::Output == opts::GNU && A) {
+    Writer.printString("File", FileStr);
   }
 
   if (opts::FileHeaders)
@@ -589,7 +596,7 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) {
       continue;
     }
     if (ObjectFile *Obj = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
-      dumpObject(Obj, Writer);
+      dumpObject(Obj, Writer, Arc);
     else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
       dumpCOFFImportFile(Imp, Writer);
     else

From b5701710a4297040b8d80eaf444d560aeba0867c Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Mon, 15 Jul 2019 22:56:12 +0000
Subject: [PATCH 180/451] [LanguageRuntime] Move ObjCLanguageRuntime into a
 plugin

Summary:
Following up to my CPPLanguageRuntime change, I'm moving
ObjCLanguageRuntime into a plugin as well.

Reviewers: JDevlieghere, compnerd, jingham, clayborg

Subscribers: mgorny, arphaman, lldb-commits

Differential Revision: https://reviews.llvm.org/D64763

llvm-svn: 366148
---
 .../DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp    | 3 ++-
 .../MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp              | 3 ++-
 .../Plugins/ExpressionParser/Clang/ClangASTSource.cpp    | 3 ++-
 .../ExpressionParser/Clang/ClangExpressionDeclMap.cpp    | 2 +-
 .../ExpressionParser/Clang/ClangExpressionParser.cpp     | 3 ++-
 .../Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp   | 3 ++-
 lldb/source/Plugins/Language/ObjC/CF.cpp                 | 3 ++-
 lldb/source/Plugins/Language/ObjC/Cocoa.cpp              | 1 -
 lldb/source/Plugins/Language/ObjC/Cocoa.h                | 3 ++-
 lldb/source/Plugins/Language/ObjC/NSArray.cpp            | 2 +-
 lldb/source/Plugins/Language/ObjC/NSDictionary.cpp       | 1 -
 lldb/source/Plugins/Language/ObjC/NSError.cpp            | 2 +-
 lldb/source/Plugins/Language/ObjC/NSException.cpp        | 2 +-
 lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp        | 2 +-
 lldb/source/Plugins/Language/ObjC/NSSet.cpp              | 1 -
 lldb/source/Plugins/Language/ObjC/NSString.h             | 3 ++-
 lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp       | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h   | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp        | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h          | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCRuntime.h             | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h           | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp         | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h           | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp | 3 ++-
 .../ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h  | 3 ++-
 .../AppleThreadPlanStepThroughObjCTrampoline.cpp         | 3 ++-
 lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt  | 9 +++++++++
 .../LanguageRuntime/ObjC}/ObjCLanguageRuntime.cpp        | 3 ++-
 .../Plugins/LanguageRuntime/ObjC}/ObjCLanguageRuntime.h  | 0
 lldb/source/Symbol/CMakeLists.txt                        | 1 +
 lldb/source/Symbol/ClangASTContext.cpp                   | 2 +-
 lldb/source/Target/CMakeLists.txt                        | 1 -
 33 files changed, 56 insertions(+), 30 deletions(-)
 rename lldb/source/{Target => Plugins/LanguageRuntime/ObjC}/ObjCLanguageRuntime.cpp (99%)
 rename lldb/{include/lldb/Target => source/Plugins/LanguageRuntime/ObjC}/ObjCLanguageRuntime.h (100%)

diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index ce12157688361..57d87eb145eb9 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -20,7 +20,6 @@
 #include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Target/ABI.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/Target.h"
@@ -32,6 +31,8 @@
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/State.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 //#define ENABLE_DEBUG_PRINTF // COMMENT THIS LINE OUT PRIOR TO CHECKIN
 #ifdef ENABLE_DEBUG_PRINTF
 #include <stdio.h>
diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
index 7d00380bfcd45..53424f018c52f 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp
@@ -16,7 +16,6 @@
 #include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Target/ABI.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/Target.h"
@@ -30,6 +29,8 @@
 #include "DynamicLoaderDarwin.h"
 #include "DynamicLoaderMacOSXDYLD.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 //#define ENABLE_DEBUG_PRINTF // COMMENT THIS LINE OUT PRIOR TO CHECKIN
 #ifdef ENABLE_DEBUG_PRINTF
 #include <stdio.h>
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index a616a1de0c6da..8d29df9dde2db 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -20,12 +20,13 @@
 #include "lldb/Symbol/SymbolFile.h"
 #include "lldb/Symbol/SymbolVendor.h"
 #include "lldb/Symbol/TaggedASTType.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/Log.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/RecordLayout.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 #include <memory>
 #include <vector>
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
index a1b8f4f0011fc..a49a7029e0d28 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp
@@ -33,7 +33,6 @@
 #include "lldb/Symbol/Variable.h"
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/Target/StackFrame.h"
@@ -53,6 +52,7 @@
 
 #include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h"
 #include "Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.h"
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
 
 using namespace lldb;
 using namespace lldb_private;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
index 1c7f931898666..7d13891ded8d2 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
@@ -78,7 +78,6 @@
 #include "lldb/Symbol/SymbolVendor.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Target/ThreadPlanCallFunction.h"
@@ -90,6 +89,8 @@
 #include "lldb/Utility/StreamString.h"
 #include "lldb/Utility/StringList.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 #include <cctype>
 #include <memory>
 
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp
index 6d34a35ba2bc1..f8e004fe7d4ad 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp
@@ -18,13 +18,14 @@
 
 #include "lldb/Expression/UtilityFunction.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/Log.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 using namespace llvm;
 using namespace lldb_private;
 
diff --git a/lldb/source/Plugins/Language/ObjC/CF.cpp b/lldb/source/Plugins/Language/ObjC/CF.cpp
index d9b6881565374..5bca260616ea8 100644
--- a/lldb/source/Plugins/Language/ObjC/CF.cpp
+++ b/lldb/source/Plugins/Language/ObjC/CF.cpp
@@ -14,7 +14,6 @@
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
@@ -22,6 +21,8 @@
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/Stream.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 using namespace lldb;
 using namespace lldb_private;
 using namespace lldb_private::formatters;
diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
index 6c9d024a13213..ddf3953bb512a 100644
--- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
+++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
@@ -17,7 +17,6 @@
 #include "lldb/Host/Time.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/ProcessStructReader.h"
 #include "lldb/Target/Target.h"
diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.h b/lldb/source/Plugins/Language/ObjC/Cocoa.h
index 819b0ceb0e0f1..388e6f03aa0f9 100644
--- a/lldb/source/Plugins/Language/ObjC/Cocoa.h
+++ b/lldb/source/Plugins/Language/ObjC/Cocoa.h
@@ -13,9 +13,10 @@
 #include "lldb/Core/ValueObject.h"
 #include "lldb/DataFormatters/TypeSummary.h"
 #include "lldb/DataFormatters/TypeSynthetic.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Utility/Stream.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_private {
 namespace formatters {
 bool NSIndexSetSummaryProvider(ValueObject &valobj, Stream &stream,
diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
index b981b373cf272..404dabf2870c0 100644
--- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
@@ -11,13 +11,13 @@
 #include "Cocoa.h"
 
 #include "Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h"
+
 #include "lldb/Core/ValueObject.h"
 #include "lldb/Core/ValueObjectConstResult.h"
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Expression/FunctionCaller.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
 #include "lldb/Utility/Endian.h"
diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
index 601d777b13711..10f66c4a37f80 100644
--- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
@@ -19,7 +19,6 @@
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
diff --git a/lldb/source/Plugins/Language/ObjC/NSError.cpp b/lldb/source/Plugins/Language/ObjC/NSError.cpp
index 3804a71e40dce..97df3be72c843 100644
--- a/lldb/source/Plugins/Language/ObjC/NSError.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSError.cpp
@@ -14,7 +14,6 @@
 #include "lldb/Core/ValueObjectConstResult.h"
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Symbol/ClangASTContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/ProcessStructReader.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
@@ -23,6 +22,7 @@
 #include "lldb/Utility/Stream.h"
 
 #include "Plugins/Language/ObjC/NSString.h"
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
 
 using namespace lldb;
 using namespace lldb_private;
diff --git a/lldb/source/Plugins/Language/ObjC/NSException.cpp b/lldb/source/Plugins/Language/ObjC/NSException.cpp
index eea34e61d47b0..931794a12ab17 100644
--- a/lldb/source/Plugins/Language/ObjC/NSException.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSException.cpp
@@ -14,7 +14,6 @@
 #include "lldb/Core/ValueObjectConstResult.h"
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Symbol/ClangASTContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/ProcessStructReader.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
@@ -23,6 +22,7 @@
 #include "lldb/Utility/Stream.h"
 
 #include "Plugins/Language/ObjC/NSString.h"
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
 
 using namespace lldb;
 using namespace lldb_private;
diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp
index a15650fdb08b2..9ee6021ae56bd 100644
--- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp
@@ -13,10 +13,10 @@
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/DataFormatters/TypeSynthetic.h"
 #include "lldb/Symbol/ClangASTContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Target.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
 using namespace lldb;
 using namespace lldb_private;
 using namespace lldb_private::formatters;
diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp
index f201526deef1a..ebaa990fb74b2 100644
--- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp
@@ -14,7 +14,6 @@
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataBufferHeap.h"
 #include "lldb/Utility/Endian.h"
diff --git a/lldb/source/Plugins/Language/ObjC/NSString.h b/lldb/source/Plugins/Language/ObjC/NSString.h
index 33052d47d56e9..699d8eb36f882 100644
--- a/lldb/source/Plugins/Language/ObjC/NSString.h
+++ b/lldb/source/Plugins/Language/ObjC/NSString.h
@@ -12,9 +12,10 @@
 
 #include "lldb/Core/ValueObject.h"
 #include "lldb/DataFormatters/TypeSummary.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Utility/Stream.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_private {
 namespace formatters {
 bool NSStringSummaryProvider(ValueObject &valobj, Stream &stream,
diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp
index fc0c933e13f74..f9ab18688de72 100644
--- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp
+++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp
@@ -16,13 +16,14 @@
 #include "lldb/DataFormatters/FormattersHelpers.h"
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Symbol/CompilerType.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/StreamString.h"
 
 #include "llvm/Support/Threading.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 #include "CF.h"
 #include "Cocoa.h"
 #include "CoreMedia.h"
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
index 7738531c71a14..b8ba9dbb65f45 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h
@@ -12,9 +12,10 @@
 #include <mutex>
 
 #include "AppleObjCRuntimeV2.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/lldb-private.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_private {
 
 class ClassDescriptorV2 : public ObjCLanguageRuntime::ClassDescriptor {
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
index 501114ad02810..18f2a1829a419 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp
@@ -9,10 +9,10 @@
 #include "AppleObjCDeclVendor.h"
 
 #include "Plugins/ExpressionParser/Clang/ASTDumper.h"
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/ClangExternalASTSourceCommon.h"
 #include "lldb/Symbol/ClangUtil.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/Log.h"
@@ -20,6 +20,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
 
+
 using namespace lldb_private;
 
 class lldb_private::AppleObjCExternalASTSource
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h
index 41e211f3bb984..77b30b7fde791 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h
@@ -11,9 +11,10 @@
 
 #include "lldb/Symbol/ClangASTContext.h"
 #include "lldb/Symbol/DeclVendor.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/lldb-private.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_private {
 
 class AppleObjCExternalASTSource;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h
index 694230e52d3c6..79ac53e1e440a 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h
@@ -14,9 +14,10 @@
 #include "AppleObjCTrampolineHandler.h"
 #include "AppleThreadPlanStepThroughObjCTrampoline.h"
 #include "lldb/Target/LanguageRuntime.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/lldb-private.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_private {
 
 class AppleObjCRuntime : public lldb_private::ObjCLanguageRuntime {
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h
index f012439364c1f..6fdae63d4126e 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h
@@ -10,9 +10,10 @@
 #define liblldb_AppleObjCRuntimeV1_h_
 
 #include "AppleObjCRuntime.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/lldb-private.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_private {
 
 class AppleObjCRuntimeV1 : public AppleObjCRuntime {
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
index 38a4f9e4094e5..635eaff637bcb 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
@@ -42,7 +42,6 @@
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Platform.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/RegisterContext.h"
@@ -66,6 +65,8 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 #include <vector>
 
 using namespace lldb;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h
index 358f9a9181fab..a0fd39dc03b20 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h
@@ -14,9 +14,10 @@
 #include <mutex>
 
 #include "AppleObjCRuntime.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/lldb-private.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 class RemoteNXMapTable;
 
 namespace lldb_private {
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
index 654dbf0e2409d..b3eb09caa86dc 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
@@ -23,7 +23,6 @@
 #include "lldb/Symbol/Symbol.h"
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/Target/Target.h"
@@ -35,6 +34,8 @@
 
 #include "llvm/ADT/STLExtras.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 #include <memory>
 
 using namespace lldb;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h
index ade96dc4170d4..e576e8f283f20 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h
@@ -11,9 +11,10 @@
 
 #include "clang/AST/ASTContext.h"
 
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/lldb-private.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 namespace lldb_utility {
 class StringLexer;
 }
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
index 12b637bd9d0c2..d18435c9c6dbc 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp
@@ -13,13 +13,14 @@
 #include "lldb/Expression/FunctionCaller.h"
 #include "lldb/Expression/UtilityFunction.h"
 #include "lldb/Target/ExecutionContext.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadPlanRunToAddress.h"
 #include "lldb/Target/ThreadPlanStepOut.h"
 #include "lldb/Utility/Log.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
+
 #include <memory>
 
 using namespace lldb;
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt b/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt
index af13dc6a144de..5b3ea2ff27fad 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt
@@ -1 +1,10 @@
+add_lldb_library(lldbPluginObjCRuntime PLUGIN
+  ObjCLanguageRuntime.cpp
+
+  LINK_LIBS
+    lldbCore
+    lldbSymbol
+    lldbTarget
+    lldbUtility
+)
 add_subdirectory(AppleObjCRuntime)
diff --git a/lldb/source/Target/ObjCLanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp
similarity index 99%
rename from lldb/source/Target/ObjCLanguageRuntime.cpp
rename to lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp
index 8e5d144e04192..631c15c46ce85 100644
--- a/lldb/source/Target/ObjCLanguageRuntime.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 #include "clang/AST/Type.h"
 
+#include "ObjCLanguageRuntime.h"
+
 #include "lldb/Core/MappedHash.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Core/PluginManager.h"
@@ -17,7 +19,6 @@
 #include "lldb/Symbol/Type.h"
 #include "lldb/Symbol/TypeList.h"
 #include "lldb/Symbol/Variable.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/Timer.h"
diff --git a/lldb/include/lldb/Target/ObjCLanguageRuntime.h b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h
similarity index 100%
rename from lldb/include/lldb/Target/ObjCLanguageRuntime.h
rename to lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h
diff --git a/lldb/source/Symbol/CMakeLists.txt b/lldb/source/Symbol/CMakeLists.txt
index 96ccc25220acb..4b922c2178661 100644
--- a/lldb/source/Symbol/CMakeLists.txt
+++ b/lldb/source/Symbol/CMakeLists.txt
@@ -59,6 +59,7 @@ add_lldb_library(lldbSymbol
     lldbPluginSymbolFileDWARF
     lldbPluginSymbolFilePDB
     lldbPluginObjCLanguage
+    lldbPluginObjCRuntime
 
   LINK_COMPONENTS
     Support
diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp
index 2d400476548e1..f85c5d2b9e05d 100644
--- a/lldb/source/Symbol/ClangASTContext.cpp
+++ b/lldb/source/Symbol/ClangASTContext.cpp
@@ -86,7 +86,6 @@
 #include "lldb/Symbol/VerifyDecl.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Language.h"
-#include "lldb/Target/ObjCLanguageRuntime.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataExtractor.h"
@@ -95,6 +94,7 @@
 #include "lldb/Utility/RegularExpression.h"
 #include "lldb/Utility/Scalar.h"
 
+#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
 #include "Plugins/SymbolFile/DWARF/DWARFASTParserClang.h"
 #include "Plugins/SymbolFile/PDB/PDBASTParser.h"
 
diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt
index af3d05b98b65b..2f59a4851b8fb 100644
--- a/lldb/source/Target/CMakeLists.txt
+++ b/lldb/source/Target/CMakeLists.txt
@@ -10,7 +10,6 @@ add_lldb_library(lldbTarget
   Memory.cpp
   MemoryHistory.cpp
   ModuleCache.cpp
-  ObjCLanguageRuntime.cpp
   OperatingSystem.cpp
   PathMappingList.cpp
   Platform.cpp

From 1cf6922660187c93f9203ea63e90b02514f08e79 Mon Sep 17 00:00:00 2001
From: Heejin Ahn <aheejin@gmail.com>
Date: Mon, 15 Jul 2019 23:04:00 +0000
Subject: [PATCH 181/451] [WebAssembly] Add missing utility methods for exnref
 type

Summary:
This adds missing utility methods and copy instruction handling for
`exnref` type and also adds tests.

`tee` instruction tests are missing because `isTee` is currently only
used in ExplicitLocals pass and testing that pass in mir requires
serialization of stackified registers in mir files, which is a bit
nontrivial because `MachineFunctionInfo` only has info of vreg numbers
(which are large integers) but not the mir's register numbers. But this
change is quite trivial anyway.

Reviewers: tlively

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64705

llvm-svn: 366149
---
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    |  4 ++
 .../WebAssembly/WebAssemblyInstrInfo.cpp      |  2 +
 .../test/CodeGen/WebAssembly/reg-argument.mir | 59 +++++++++++++++++++
 llvm/test/CodeGen/WebAssembly/reg-copy.mir    | 11 ++++
 4 files changed, 76 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/reg-argument.mir

diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 31ad88b3549c7..7a9f59b1a4f2c 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -385,6 +385,8 @@ inline bool isArgument(unsigned Opc) {
   case WebAssembly::ARGUMENT_v4f32_S:
   case WebAssembly::ARGUMENT_v2f64:
   case WebAssembly::ARGUMENT_v2f64_S:
+  case WebAssembly::ARGUMENT_exnref:
+  case WebAssembly::ARGUMENT_exnref_S:
     return true;
   default:
     return false;
@@ -423,6 +425,8 @@ inline bool isTee(unsigned Opc) {
   case WebAssembly::TEE_F64_S:
   case WebAssembly::TEE_V128:
   case WebAssembly::TEE_V128_S:
+  case WebAssembly::TEE_EXNREF:
+  case WebAssembly::TEE_EXNREF_S:
     return true;
   default:
     return false;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index d7022ce0bfba3..a86c9af28f0d7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -75,6 +75,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     CopyOpcode = WebAssembly::COPY_F64;
   else if (RC == &WebAssembly::V128RegClass)
     CopyOpcode = WebAssembly::COPY_V128;
+  else if (RC == &WebAssembly::EXNREFRegClass)
+    CopyOpcode = WebAssembly::COPY_EXNREF;
   else
     llvm_unreachable("Unexpected register class");
 
diff --git a/llvm/test/CodeGen/WebAssembly/reg-argument.mir b/llvm/test/CodeGen/WebAssembly/reg-argument.mir
new file mode 100644
index 0000000000000..70c033f7f8f0f
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/reg-argument.mir
@@ -0,0 +1,59 @@
+# RUN: llc -mtriple=wasm32-unknown-unknown %s -o - -run-pass wasm-argument-move  | FileCheck %s
+
+# wasm-argument-move pass moves all ARGUMENT instructions to the top of the
+# entry BB.
+---
+name: argument_i32
+# CHECK-LABEL: argument_i32
+body: |
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NEXT: %1:i32 = ARGUMENT_i32 0
+  bb.0:
+    %0:i32 = CONST_I32 0, implicit-def $arguments
+    %1:i32 = ARGUMENT_i32 0, implicit $arguments
+    RETURN_VOID implicit-def $arguments
+...
+---
+name: argument_i64
+# CHECK-LABEL: argument_i64
+body: |
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NEXT: %1:i64 = ARGUMENT_i64 0
+  bb.0:
+    %0:i32 = CONST_I32 0, implicit-def $arguments
+    %1:i64 = ARGUMENT_i64 0, implicit $arguments
+    RETURN_VOID implicit-def $arguments
+...
+---
+name: argument_f32
+# CHECK-LABEL: argument_f32
+body: |
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NEXT: %1:f32 = ARGUMENT_f32 0
+  bb.0:
+    %0:i32 = CONST_I32 0, implicit-def $arguments
+    %1:f32 = ARGUMENT_f32 0, implicit $arguments
+    RETURN_VOID implicit-def $arguments
+...
+---
+name: argument_f64
+# CHECK-LABEL: argument_f64
+body: |
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NEXT: %1:f64 = ARGUMENT_f64 0
+  bb.0:
+    %0:i32 = CONST_I32 0, implicit-def $arguments
+    %1:f64 = ARGUMENT_f64 0, implicit $arguments
+    RETURN_VOID implicit-def $arguments
+...
+---
+name: argument_exnref
+# CHECK-LABEL: argument_exnref
+body: |
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NEXT: %1:exnref = ARGUMENT_exnref 0
+  bb.0:
+    %0:i32 = CONST_I32 0, implicit-def $arguments
+    %1:exnref = ARGUMENT_exnref 0, implicit $arguments
+    RETURN_VOID implicit-def $arguments
+...
diff --git a/llvm/test/CodeGen/WebAssembly/reg-copy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir
index 0a362699b8143..a077c347efdaa 100644
--- a/llvm/test/CodeGen/WebAssembly/reg-copy.mir
+++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir
@@ -55,3 +55,14 @@ body: |
     %0:v128 = COPY %1:v128
     RETURN_VOID implicit-def $arguments
 ...
+---
+name: copy_exnref
+# CHECK-LABEL: copy_exnref
+body: |
+  ; CHECK-LABEL: bb.0:
+  ; CHECK-NEXT: %0:exnref = COPY_EXNREF %1:exnref
+  ; CHECK-NEXT: RETURN_VOID
+  bb.0:
+    %0:exnref = COPY %1:exnref
+    RETURN_VOID implicit-def $arguments
+...

From c48162db994ab6040c45d468ea95772b574ab3ef Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Mon, 15 Jul 2019 23:05:14 +0000
Subject: [PATCH 182/451] [TSan] Fix asm token error (again)

llvm-svn: 366150
---
 compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
index 8e4ddc969e058..c387416c20dd7 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
@@ -421,7 +421,7 @@ static void InitializeLongjmpXorKey() {
 
   // 2. Retrieve vanilla/mangled SP.
   uptr sp;
-  asm("mov  %0, %sp" : "=r" (sp));
+  asm("mov  %0, sp" : "=r" (sp));
   uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT];
 
   // 3. xor SPs to obtain key.

From 51193871dafd99e79d7d19f62cffbdcdda238530 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 15 Jul 2019 23:07:56 +0000
Subject: [PATCH 183/451] [X86] Teach convertToThreeAddress to handle SUB with
 immediate

We mostly avoid sub with immediate but there are a couple cases that can create them. One is the add 128, %rax -> sub -128, %rax trick in isel. The other is when a SUB immediate gets created for a compare where both the flags and the subtract value is used. If we are unable to linearize the SelectionDAG to satisfy the flag user and the sub result user from the same instruction, we will clone the sub immediate for the two uses. The one that produces flags will eventually become a compare. The other will have its flag output dead, and could then be considered for LEA creation.

I added additional test cases to add.ll to show the the sub -128 trick gets converted to LEA and a case where we don't need to convert it.

This showed up in the current codegen for PR42571.

Differential Revision: https://reviews.llvm.org/D64574

llvm-svn: 366151
---
 llvm/lib/Target/X86/X86InstrArithmetic.td     | 15 ++++---
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 45 +++++++++++++++++++
 llvm/test/CodeGen/X86/add.ll                  | 14 +++---
 .../X86/bmi-intrinsics-fast-isel-x86_64.ll    | 12 ++---
 .../CodeGen/X86/bmi-intrinsics-fast-isel.ll   | 28 +++++-------
 llvm/test/CodeGen/X86/cgp-usubo.ll            |  4 +-
 .../X86/tbm-intrinsics-fast-isel-x86_64.ll    |  3 +-
 .../CodeGen/X86/tbm-intrinsics-fast-isel.ll   |  7 ++-
 8 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index fbdc55cb02556..e52635f8d48b9 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -924,11 +924,12 @@ class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
 multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
                          string mnemonic, Format RegMRM, Format MemMRM,
                          SDNode opnodeflag, SDNode opnode,
-                         bit CommutableRR, bit ConvertibleToThreeAddress> {
+                         bit CommutableRR, bit ConvertibleToThreeAddress,
+                         bit ConvertibleToThreeAddressRR> {
   let Defs = [EFLAGS] in {
     let Constraints = "$src1 = $dst" in {
       let isCommutable = CommutableRR in {
-        let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in {
           def NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
           def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
           def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
@@ -1169,16 +1170,16 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
 
 
 defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
-                         X86and_flag, and, 1, 0>;
+                         X86and_flag, and, 1, 0, 0>;
 defm OR  : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
-                         X86or_flag, or, 1, 0>;
+                         X86or_flag, or, 1, 0, 0>;
 defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
-                         X86xor_flag, xor, 1, 0>;
+                         X86xor_flag, xor, 1, 0, 0>;
 defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
-                         X86add_flag, add, 1, 1>;
+                         X86add_flag, add, 1, 1, 1>;
 let isCompare = 1 in {
 defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
-                         X86sub_flag, sub, 0, 0>;
+                         X86sub_flag, sub, 0, 1, 0>;
 }
 
 // Arithmetic.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index e5d3a09c291b9..dbe45356c42bf 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1078,6 +1078,51 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   case X86::ADD16ri_DB:
   case X86::ADD16ri8_DB:
     return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+  case X86::SUB8ri:
+  case X86::SUB16ri8:
+  case X86::SUB16ri:
+    /// FIXME: Support these similar to ADD8ri/ADD16ri*.
+    return nullptr;
+  case X86::SUB32ri8:
+  case X86::SUB32ri: {
+    int64_t Imm = MI.getOperand(2).getImm();
+    if (!isInt<32>(-Imm))
+      return nullptr;
+
+    assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
+    unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
+
+    bool isKill;
+    unsigned SrcReg;
+    MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
+                        SrcReg, isKill, ImplicitOp, LV))
+      return nullptr;
+
+    MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+                                  .add(Dest)
+                                  .addReg(SrcReg, getKillRegState(isKill));
+    if (ImplicitOp.getReg() != 0)
+      MIB.add(ImplicitOp);
+
+    NewMI = addOffset(MIB, -Imm);
+    break;
+  }
+
+  case X86::SUB64ri8:
+  case X86::SUB64ri32: {
+    int64_t Imm = MI.getOperand(2).getImm();
+    if (!isInt<32>(-Imm))
+      return nullptr;
+
+    assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
+
+    MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(),
+                                      get(X86::LEA64r)).add(Dest).add(Src);
+    NewMI = addOffset(MIB, -Imm);
+    break;
+  }
+
   case X86::VMOVDQU8Z128rmk:
   case X86::VMOVDQU8Z256rmk:
   case X86::VMOVDQU8Zrmk:
diff --git a/llvm/test/CodeGen/X86/add.ll b/llvm/test/CodeGen/X86/add.ll
index e9516b871048d..1662562bd013f 100644
--- a/llvm/test/CodeGen/X86/add.ll
+++ b/llvm/test/CodeGen/X86/add.ll
@@ -16,14 +16,14 @@ define i32 @test1(i32 inreg %a) nounwind {
 ;
 ; X64-LINUX-LABEL: test1:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    movl %edi, %eax
-; X64-LINUX-NEXT:    subl $-128, %eax
+; X64-LINUX-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-LINUX-NEXT:    leal 128(%rdi), %eax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test1:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    movl %ecx, %eax
-; X64-WIN32-NEXT:    subl $-128, %eax
+; X64-WIN32-NEXT:    # kill: def $ecx killed $ecx def $rcx
+; X64-WIN32-NEXT:    leal 128(%rcx), %eax
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i32 %a, 128
@@ -86,14 +86,12 @@ define i64 @test3(i64 inreg %a) nounwind {
 ;
 ; X64-LINUX-LABEL: test3:
 ; X64-LINUX:       # %bb.0: # %entry
-; X64-LINUX-NEXT:    movq %rdi, %rax
-; X64-LINUX-NEXT:    subq $-128, %rax
+; X64-LINUX-NEXT:    leaq 128(%rdi), %rax
 ; X64-LINUX-NEXT:    retq
 ;
 ; X64-WIN32-LABEL: test3:
 ; X64-WIN32:       # %bb.0: # %entry
-; X64-WIN32-NEXT:    movq %rcx, %rax
-; X64-WIN32-NEXT:    subq $-128, %rax
+; X64-WIN32-NEXT:    leaq 128(%rcx), %rax
 ; X64-WIN32-NEXT:    retq
 entry:
   %b = add i64 %a, 128
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
index 872233f51ad48..d704f38307fcb 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
@@ -43,8 +43,7 @@ define i64 @test__blsi_u64(i64 %a0) {
 define i64 @test__blsmsk_u64(i64 %a0) {
 ; X64-LABEL: test__blsmsk_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    xorq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
@@ -55,8 +54,7 @@ define i64 @test__blsmsk_u64(i64 %a0) {
 define i64 @test__blsr_u64(i64 %a0) {
 ; X64-LABEL: test__blsr_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
@@ -124,8 +122,7 @@ define i64 @test_blsi_u64(i64 %a0) {
 define i64 @test_blsmsk_u64(i64 %a0) {
 ; X64-LABEL: test_blsmsk_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    xorq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
@@ -136,8 +133,7 @@ define i64 @test_blsmsk_u64(i64 %a0) {
 define i64 @test_blsr_u64(i64 %a0) {
 ; X64-LABEL: test_blsr_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    andq %rdi, %rax
 ; X64-NEXT:    retq
   %dec = sub i64 %a0, 1
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
index ced1585bd71b4..c6950da4064d1 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
@@ -82,15 +82,14 @@ define i32 @test__blsmsk_u32(i32 %a0) {
 ; X32-LABEL: test__blsmsk_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    xorl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test__blsmsk_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
@@ -102,15 +101,14 @@ define i32 @test__blsr_u32(i32 %a0) {
 ; X32-LABEL: test__blsr_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    andl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test__blsr_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
@@ -224,15 +222,14 @@ define i32 @test_blsmsk_u32(i32 %a0) {
 ; X32-LABEL: test_blsmsk_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    xorl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_blsmsk_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    xorl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
@@ -244,15 +241,14 @@ define i32 @test_blsr_u32(i32 %a0) {
 ; X32-LABEL: test_blsr_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    andl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_blsr_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    retq
   %dec = sub i32 %a0, 1
diff --git a/llvm/test/CodeGen/X86/cgp-usubo.ll b/llvm/test/CodeGen/X86/cgp-usubo.ll
index 6733a8258f8d6..ab82d9809724a 100644
--- a/llvm/test/CodeGen/X86/cgp-usubo.ll
+++ b/llvm/test/CodeGen/X86/cgp-usubo.ll
@@ -246,8 +246,8 @@ exit:
 define i32 @PR42571(i32 %x, i32 %y) {
 ; CHECK-LABEL: PR42571:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    subl $1, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -1(%rdi), %eax
 ; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    cmpl $1, %edi
 ; CHECK-NEXT:    cmovbl %esi, %eax
diff --git a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
index 82b3b4c3624b8..35c14697cf967 100644
--- a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll
@@ -76,8 +76,7 @@ define i64 @test__blcs_u64(i64 %a0) {
 define i64 @test__blsfill_u64(i64 %a0) {
 ; X64-LABEL: test__blsfill_u64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    subq $1, %rax
+; X64-NEXT:    leaq -1(%rdi), %rax
 ; X64-NEXT:    orq %rdi, %rax
 ; X64-NEXT:    retq
   %1 = sub i64 %a0, 1
diff --git a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
index 0664d043e1163..55fe9b8b3c0c1 100644
--- a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll
@@ -125,15 +125,14 @@ define i32 @test__blsfill_u32(i32 %a0) {
 ; X32-LABEL: test__blsfill_u32:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    subl $1, %eax
+; X32-NEXT:    leal -1(%ecx), %eax
 ; X32-NEXT:    orl %ecx, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test__blsfill_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    subl $1, %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal -1(%rdi), %eax
 ; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %1 = sub i32 %a0, 1

From 60a0d49e77cf6583b749ad6189751cd5d31bf3ee Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Mon, 15 Jul 2019 23:14:00 +0000
Subject: [PATCH 184/451] [DirectoryWatcher][linux] Fix for older kernels

IN_EXCL_UNLINK exists since Linux 2.6.36

Differential Revision: https://reviews.llvm.org/D64764

llvm-svn: 366152
---
 .../DirectoryWatcher/linux/DirectoryWatcher-linux.cpp    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
index 0c9f799b638d6..6d7d69da4db5a 100644
--- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
+++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include <fcntl.h>
+#include <linux/version.h>
 #include <sys/epoll.h>
 #include <sys/inotify.h>
 #include <unistd.h>
@@ -332,8 +333,12 @@ std::unique_ptr<DirectoryWatcher> clang::DirectoryWatcher::create(
 
   const int InotifyWD = inotify_add_watch(
       InotifyFD, Path.str().c_str(),
-      IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_EXCL_UNLINK | IN_MODIFY |
-          IN_MOVED_FROM | IN_MOVE_SELF | IN_MOVED_TO | IN_ONLYDIR | IN_IGNORED);
+      IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_MODIFY |
+      IN_MOVED_FROM | IN_MOVE_SELF | IN_MOVED_TO | IN_ONLYDIR | IN_IGNORED
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+      | IN_EXCL_UNLINK
+#endif
+      );
   if (InotifyWD == -1)
     return nullptr;
 

From bb147aabc68c366cff4ac5f1713b7b138a3b0fe0 Mon Sep 17 00:00:00 2001
From: Leonard Chan <leonardchan@google.com>
Date: Mon, 15 Jul 2019 23:18:31 +0000
Subject: [PATCH 185/451] Revert "[NewPM] Port Sancov"

This reverts commit 5652f35817f07b16f8b3856d594cc42f4d7ee29c.

llvm-svn: 366153
---
 clang/lib/CodeGen/BackendUtil.cpp             |  47 +--
 clang/test/CodeGen/sancov-new-pm.c            |  41 --
 llvm/include/llvm/InitializePasses.h          |   4 +-
 .../include/llvm/Transforms/Instrumentation.h |   4 +
 .../Instrumentation/SanitizerCoverage.h       |  62 ---
 llvm/lib/Passes/PassBuilder.cpp               |   3 +-
 llvm/lib/Passes/PassRegistry.def              |   2 -
 .../Instrumentation/Instrumentation.cpp       |   2 +-
 .../Instrumentation/SanitizerCoverage.cpp     | 359 ++++++------------
 .../SanitizerCoverage/abort-in-entry-block.ll |   1 -
 .../SanitizerCoverage/backedge-pruning.ll     |   2 -
 .../SanitizerCoverage/chains.ll               |   1 -
 .../cmp-tracing-api-x86_32.ll                 |  25 +-
 .../cmp-tracing-api-x86_64.ll                 |  25 +-
 .../SanitizerCoverage/cmp-tracing.ll          |   1 -
 .../SanitizerCoverage/coff-comdat.ll          |   1 -
 .../coff-pc-table-inline-8bit-counters.ll     |   1 -
 .../SanitizerCoverage/coff-used-ctor.ll       |   3 +-
 .../SanitizerCoverage/const-cmp-tracing.ll    |   1 -
 .../SanitizerCoverage/coverage-dbg.ll         |   1 -
 .../SanitizerCoverage/coverage.ll             |   4 +-
 .../SanitizerCoverage/coverage2-dbg.ll        |   1 -
 .../SanitizerCoverage/div-tracing.ll          |   1 -
 .../SanitizerCoverage/gep-tracing.ll          |   1 -
 .../SanitizerCoverage/inline-8bit-counters.ll |   1 -
 .../interposable-symbol-nocomdat.ll           |   2 -
 .../SanitizerCoverage/no-func.ll              |   1 -
 .../SanitizerCoverage/pc-table.ll             |   2 -
 .../SanitizerCoverage/postdominator_check.ll  |   2 -
 .../Instrumentation/SanitizerCoverage/seh.ll  |   3 -
 .../stack-depth-variable-declared-by-user.ll  |   2 -
 .../SanitizerCoverage/stack-depth.ll          |   5 -
 .../SanitizerCoverage/switch-tracing.ll       |   1 -
 .../trace-pc-guard-comdat.ll                  |   1 -
 .../trace-pc-guard-inline-8bit-counters.ll    |   1 -
 .../trace-pc-guard-nocomdat.ll                |   1 -
 .../SanitizerCoverage/tracing-comdat.ll       |   3 -
 .../SanitizerCoverage/tracing.ll              |   4 -
 .../SanitizerCoverage/unreachable-critedge.ll |   1 -
 .../SanitizerCoverage/wineh.ll                |   1 -
 40 files changed, 156 insertions(+), 468 deletions(-)
 delete mode 100644 clang/test/CodeGen/sancov-new-pm.c
 delete mode 100644 llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 8499af07dbb7f..40a529c319f4a 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -60,7 +60,6 @@
 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
-#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
@@ -196,8 +195,11 @@ static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
   PM.add(createBoundsCheckingLegacyPass());
 }
 
-static SanitizerCoverageOptions
-getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) {
+static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
+                                     legacy::PassManagerBase &PM) {
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
   SanitizerCoverageOptions Opts;
   Opts.CoverageType =
       static_cast<SanitizerCoverageOptions::Type>(CGOpts.SanitizeCoverageType);
@@ -213,17 +215,7 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) {
   Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters;
   Opts.PCTable = CGOpts.SanitizeCoveragePCTable;
   Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth;
-  return Opts;
-}
-
-static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
-                                     legacy::PassManagerBase &PM) {
-  const PassManagerBuilderWrapper &BuilderWrapper =
-      static_cast<const PassManagerBuilderWrapper &>(Builder);
-  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
-  auto Opts = getSancovOptsFromCGOpts(CGOpts);
-  PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts));
-  PM.add(createSanitizerCoverageLegacyPassPass(Opts));
+  PM.add(createSanitizerCoverageModulePass(Opts));
 }
 
 // Check if ASan should use GC-friendly instrumentation for globals.
@@ -1143,21 +1135,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
             EntryExitInstrumenterPass(/*PostInlining=*/false)));
       });
 
-      if (CodeGenOpts.SanitizeCoverageType ||
-          CodeGenOpts.SanitizeCoverageIndirectCalls ||
-          CodeGenOpts.SanitizeCoverageTraceCmp) {
-        auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
-        PB.registerPipelineStartEPCallback(
-            [SancovOpts](ModulePassManager &MPM) {
-              MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts));
-            });
-        PB.registerOptimizerLastEPCallback(
-            [SancovOpts](FunctionPassManager &FPM,
-                         PassBuilder::OptimizationLevel Level) {
-              FPM.addPass(SanitizerCoveragePass(SancovOpts));
-            });
-      }
-
       // Register callbacks to schedule sanitizer passes at the appropriate part of
       // the pipeline.
       // FIXME: either handle asan/the remaining sanitizers or error out
@@ -1242,18 +1219,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
       }
     }
 
-    if (CodeGenOpts.OptimizationLevel == 0) {
-      if (CodeGenOpts.SanitizeCoverageType ||
-          CodeGenOpts.SanitizeCoverageIndirectCalls ||
-          CodeGenOpts.SanitizeCoverageTraceCmp) {
-        auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
-        MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts));
-        MPM.addPass(createModuleToFunctionPassAdaptor(
-            SanitizerCoveragePass(SancovOpts)));
-      }
-
+    if (CodeGenOpts.OptimizationLevel == 0)
       addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts);
-    }
   }
 
   // FIXME: We still use the legacy pass manager to do code generation. We
diff --git a/clang/test/CodeGen/sancov-new-pm.c b/clang/test/CodeGen/sancov-new-pm.c
deleted file mode 100644
index 06d9042bc70a8..0000000000000
--- a/clang/test/CodeGen/sancov-new-pm.c
+++ /dev/null
@@ -1,41 +0,0 @@
-// Test that SanitizerCoverage works under the new pass manager.
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -O2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O2
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto -O2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O2
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto=thin -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto=thin -O2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O2,CHECK-O2-THINLTO
-
-extern void *memcpy(void *, const void *, unsigned long);
-extern int printf(const char *restrict, ...);
-
-int LLVMFuzzerTestOneInput(const unsigned char *data, unsigned long size) {
-  unsigned char buf[4];
-
-  if (size < 8)
-    return 0;
-
-  if (data[0] == 'h' && data[1] == 'i' && data[2] == '!') {
-    memcpy(buf, data, size);
-    printf("test: %.2X\n", buf[0]);
-  }
-
-  return 0;
-}
-
-// CHECK-DAG: declare void @__sanitizer_cov_pcs_init(i64*, i64*)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp8(i64, i64)
-// CHECK-O2-THINLTO-NOT: declare void @__sanitizer_cov_trace_const_cmp1(i8 zeroext, i8 zeroext)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_const_cmp2(i16 zeroext, i16 zeroext)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_const_cmp4(i32 zeroext, i32 zeroext)
-// CHECK-O2-THINLTO-NOT: declare void @__sanitizer_cov_trace_const_cmp8(i64, i64)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_div4(i32 zeroext)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_div8(i64)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_gep(i64)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_switch(i64, i64*)
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_pc()
-// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_pc_guard(i32*)
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 031df1f00e879..164d0be2855ad 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -360,9 +360,7 @@ void initializeSROALegacyPassPass(PassRegistry&);
 void initializeSafeStackLegacyPassPass(PassRegistry&);
 void initializeSafepointIRVerifierPass(PassRegistry&);
 void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
-void initializeSanitizerCoverageFunctionCheckLegacyPassPass(PassRegistry &);
-void initializeSanitizerCoverageLegacyPassPass(PassRegistry &);
-void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &);
+void initializeSanitizerCoverageModulePass(PassRegistry&);
 void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
 void initializeScalarizeMaskedMemIntrinPass(PassRegistry&);
 void initializeScalarizerLegacyPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h
index fcad1e11895fe..8b70d2926ae9e 100644
--- a/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation.h
@@ -181,6 +181,10 @@ struct SanitizerCoverageOptions {
   SanitizerCoverageOptions() = default;
 };
 
+// Insert SanitizerCoverage instrumentation.
+ModulePass *createSanitizerCoverageModulePass(
+    const SanitizerCoverageOptions &Options = SanitizerCoverageOptions());
+
 /// Calculate what to divide by to scale counts.
 ///
 /// Given the maximum count, calculate a divisor that will scale all the
diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
deleted file mode 100644
index bdc79b1a45244..0000000000000
--- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===--------- Definition of the SanitizerCoverage class --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SanitizerCoverage class which is a port of the legacy
-// SanitizerCoverage pass to use the new PassManager infrastructure.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H
-#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H
-
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Instrumentation.h"
-
-namespace llvm {
-
-/// This is the SanitizerCoverage pass used in the new pass manager. The
-/// pass instruments functions for coverage.
-class SanitizerCoveragePass : public PassInfoMixin<SanitizerCoveragePass> {
-public:
-  explicit SanitizerCoveragePass(
-      SanitizerCoverageOptions Options = SanitizerCoverageOptions())
-      : Options(Options) {}
-  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
-  SanitizerCoverageOptions Options;
-};
-
-/// This is the ModuleSanitizerCoverage pass used in the new pass manager. This
-/// adds initialization calls to the module for trace PC guards and 8bit
-/// counters if they are requested.
-class ModuleSanitizerCoveragePass
-    : public PassInfoMixin<ModuleSanitizerCoveragePass> {
-public:
-  explicit ModuleSanitizerCoveragePass(
-      SanitizerCoverageOptions Options = SanitizerCoverageOptions())
-      : Options(Options) {}
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-
-private:
-  SanitizerCoverageOptions Options;
-};
-
-// Insert SanitizerCoverage instrumentation.
-FunctionPass *createSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options = SanitizerCoverageOptions());
-ModulePass *createModuleSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options = SanitizerCoverageOptions());
-
-} // namespace llvm
-
-#endif
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index eef94bf9012a2..e2b2a2b252684 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -101,7 +101,6 @@
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
 #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
-#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
 #include "llvm/Transforms/Scalar/ADCE.h"
 #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
@@ -144,8 +143,8 @@
 #include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
 #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
-#include "llvm/Transforms/Scalar/MergeICmps.h"
 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
 #include "llvm/Transforms/Scalar/NaryReassociate.h"
 #include "llvm/Transforms/Scalar/NewGVN.h"
 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index b1b2304af8d6b..e785558d5a732 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -86,7 +86,6 @@ MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
 MODULE_PASS("verify", VerifierPass())
 MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
 MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
-MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
 MODULE_PASS("poison-checking", PoisonCheckingPass())
 #undef MODULE_PASS
 
@@ -246,7 +245,6 @@ FUNCTION_PASS("khwasan", HWAddressSanitizerPass(true, true))
 FUNCTION_PASS("msan", MemorySanitizerPass({}))
 FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
 FUNCTION_PASS("tsan", ThreadSanitizerPass())
-FUNCTION_PASS("sancov-func", SanitizerCoveragePass())
 #undef FUNCTION_PASS
 
 #ifndef FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 64bf51476612a..f56a1bd91b898 100644
--- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -116,7 +116,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeMemorySanitizerLegacyPassPass(Registry);
   initializeHWAddressSanitizerLegacyPassPass(Registry);
   initializeThreadSanitizerLegacyPassPass(Registry);
-  initializeSanitizerCoverageLegacyPassPass(Registry);
+  initializeSanitizerCoverageModulePass(Registry);
   initializeDataFlowSanitizerPass(Registry);
 }
 
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index b7c29d6d28b41..ca0cb4bdbe844 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/EHPersonalities.h"
@@ -177,158 +176,24 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
   return Options;
 }
 
-bool canInstrumentWithSancov(const Function &F) {
-  if (F.empty())
-    return false;
-  if (F.getName().find(".module_ctor") != std::string::npos)
-    return false; // Should not instrument sanitizer init functions.
-  if (F.getName().startswith("__sanitizer_"))
-    return false; // Don't instrument __sanitizer_* callbacks.
-  // Don't touch available_externally functions, their actual body is elewhere.
-  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
-    return false;
-  // Don't instrument MSVC CRT configuration helpers. They may run before normal
-  // initialization.
-  if (F.getName() == "__local_stdio_printf_options" ||
-      F.getName() == "__local_stdio_scanf_options")
-    return false;
-  if (isa<UnreachableInst>(F.getEntryBlock().getTerminator()))
-    return false;
-  // Don't instrument functions using SEH for now. Splitting basic blocks like
-  // we do for coverage breaks WinEHPrepare.
-  // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
-  if (F.hasPersonalityFn() &&
-      isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
-    return false;
-  return true;
-}
-
-/// This is a class for instrumenting the module to add calls to initializing
-/// the trace PC guards and 8bit counter globals. This should only be done
-/// though if there is at least one function that can be instrumented with
-/// Sancov.
-class ModuleSanitizerCoverage {
+class SanitizerCoverageModule : public ModulePass {
 public:
-  ModuleSanitizerCoverage(const SanitizerCoverageOptions &Options)
-      : Options(OverrideFromCL(Options)) {}
-
-  bool instrumentModule(Module &M) {
-    if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
-      return false;
-
-    Function *Ctor = nullptr;
-    LLVMContext *C = &(M.getContext());
-    const DataLayout *DL = &M.getDataLayout();
-    TargetTriple = Triple(M.getTargetTriple());
-    IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
-    Type *IntptrPtrTy = PointerType::getUnqual(IntptrTy);
-    IRBuilder<> IRB(*C);
-    Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
-    Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
-    Int8Ty = IRB.getInt8Ty();
-
-    // Check that the __sancov_lowest_stack marker does not already exist.
-    Constant *SanCovLowestStackConstant =
-        M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
-    GlobalVariable *SanCovLowestStack =
-        dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
-    if (!SanCovLowestStack) {
-      C->emitError(StringRef("'") + SanCovLowestStackName +
-                   "' should not be declared by the user");
-      return true;
-    }
-
-    // We want to emit guard init calls if the module contains a function that
-    // we can instrument with SanitizerCoverage. We ignore any functions that
-    // were inserted by SanitizerCoverage and get the result from the analysis
-    // that checks for a valid function that the analysis may have run over.
-    if (!llvm::any_of(
-            M, [](const Function &F) { return canInstrumentWithSancov(F); }))
-      return false;
-
-    // Emit the init calls.
-    if (Options.TracePCGuard)
-      Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
-                                        SanCovTracePCGuardInitName, Int32PtrTy,
-                                        SanCovGuardsSectionName);
-    if (Options.Inline8bitCounters)
-      Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
-                                        SanCov8bitCountersInitName, Int8PtrTy,
-                                        SanCovCountersSectionName);
-    if (Ctor && Options.PCTable) {
-      auto SecStartEnd =
-          CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
-      FunctionCallee InitFunction = declareSanitizerInitFunction(
-          M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
-      IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
-      IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
-    }
-    return Ctor;
+  SanitizerCoverageModule(
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+      : ModulePass(ID), Options(OverrideFromCL(Options)) {
+    initializeSanitizerCoverageModulePass(*PassRegistry::getPassRegistry());
   }
+  bool runOnModule(Module &M) override;
+  bool runOnFunction(Function &F);
+  static char ID; // Pass identification, replacement for typeid
+  StringRef getPassName() const override { return "SanitizerCoverageModule"; }
 
-private:
-  Function *CreateInitCallsForSections(Module &M, const char *CtorName,
-                                       const char *InitFunctionName, Type *Ty,
-                                       const char *Section);
-  std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
-                                                Type *Ty);
-  std::string getSectionStart(const std::string &Section) const {
-    if (TargetTriple.isOSBinFormatMachO())
-      return "\1section$start$__DATA$__" + Section;
-    return "__start___" + Section;
-  }
-  std::string getSectionEnd(const std::string &Section) const {
-    if (TargetTriple.isOSBinFormatMachO())
-      return "\1section$end$__DATA$__" + Section;
-    return "__stop___" + Section;
-  }
-
-  SanitizerCoverageOptions Options;
-  Triple TargetTriple;
-  Type *IntptrTy, *Int8PtrTy, *Int8Ty;
-};
-
-class ModuleSanitizerCoverageLegacyPass : public ModulePass {
-public:
-  static char ID;
-
-  ModuleSanitizerCoverageLegacyPass(
-      SanitizerCoverageOptions Options = SanitizerCoverageOptions())
-      : ModulePass(ID), Options(Options) {
-    initializeModuleSanitizerCoverageLegacyPassPass(
-        *PassRegistry::getPassRegistry());
-  }
-
-  bool runOnModule(Module &M) override {
-    ModuleSanitizerCoverage ModuleSancov(Options);
-    return ModuleSancov.instrumentModule(M);
-  };
-
-  StringRef getPassName() const override {
-    return "ModuleSanitizerCoverageLegacyPass";
-  }
-
-private:
-  SanitizerCoverageOptions Options;
-};
-
-char ModuleSanitizerCoverageLegacyPass::ID = 0;
-
-class SanitizerCoverage {
-public:
-  SanitizerCoverage(Function &F, const SanitizerCoverageOptions &Options)
-      : CurModule(F.getParent()), Options(OverrideFromCL(Options)) {
-    initializeModule(*F.getParent());
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTreeWrapperPass>();
   }
 
-  ~SanitizerCoverage() { finalizeModule(*CurModule); }
-
-  bool instrumentFunction(Function &F, const DominatorTree *DT,
-                          const PostDominatorTree *PDT);
-
 private:
-  void initializeModule(Module &M);
-  void finalizeModule(Module &M);
   void InjectCoverageForIndirectCalls(Function &F,
                                       ArrayRef<Instruction *> IndirCalls);
   void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
@@ -347,6 +212,11 @@ class SanitizerCoverage {
   void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
   void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
                              bool IsLeafFunc = true);
+  Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+                                       const char *InitFunctionName, Type *Ty,
+                                       const char *Section);
+  std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
+                                                Type *Ty);
 
   void SetNoSanitizeMetadata(Instruction *I) {
     I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
@@ -354,6 +224,8 @@ class SanitizerCoverage {
   }
 
   std::string getSectionName(const std::string &Section) const;
+  std::string getSectionStart(const std::string &Section) const;
+  std::string getSectionEnd(const std::string &Section) const;
   FunctionCallee SanCovTracePCIndir;
   FunctionCallee SanCovTracePC, SanCovTracePCGuard;
   FunctionCallee SanCovTraceCmpFunction[4];
@@ -380,63 +252,10 @@ class SanitizerCoverage {
   SanitizerCoverageOptions Options;
 };
 
-class SanitizerCoverageLegacyPass : public FunctionPass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-
-  SanitizerCoverageLegacyPass(
-      SanitizerCoverageOptions Options = SanitizerCoverageOptions())
-      : FunctionPass(ID), Options(Options) {
-    initializeSanitizerCoverageLegacyPassPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnFunction(Function &F) override {
-    const DominatorTree *DT =
-        &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    const PostDominatorTree *PDT =
-        &getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-    SanitizerCoverage Sancov(F, Options);
-    return Sancov.instrumentFunction(F, DT, PDT);
-  }
-
-  StringRef getPassName() const override {
-    return "SanitizerCoverageLegacyPass";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    // Make the module sancov pass required by this pass so that it runs when
-    // -sancov is passed.
-    AU.addRequired<ModuleSanitizerCoverageLegacyPass>();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<PostDominatorTreeWrapperPass>();
-  }
-
-private:
-  SanitizerCoverageOptions Options;
-};
-
 } // namespace
 
-PreservedAnalyses SanitizerCoveragePass::run(Function &F,
-                                             FunctionAnalysisManager &AM) {
-  const DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
-  const PostDominatorTree *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
-  SanitizerCoverage Sancov(F, Options);
-  if (Sancov.instrumentFunction(F, DT, PDT))
-    return PreservedAnalyses::none();
-  return PreservedAnalyses::all();
-}
-
-PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M,
-                                                   ModuleAnalysisManager &AM) {
-  ModuleSanitizerCoverage ModuleSancov(Options);
-  if (ModuleSancov.instrumentModule(M))
-    return PreservedAnalyses::none();
-  return PreservedAnalyses::all();
-}
-
 std::pair<Value *, Value *>
-ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
+SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section,
                                            Type *Ty) {
   GlobalVariable *SecStart =
       new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
@@ -446,7 +265,6 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
       new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
                          nullptr, getSectionEnd(Section));
   SecEnd->setVisibility(GlobalValue::HiddenVisibility);
-
   IRBuilder<> IRB(M.getContext());
   Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty);
   if (!TargetTriple.isOSBinFormatCOFF())
@@ -460,7 +278,7 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section,
   return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
 }
 
-Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
+Function *SanitizerCoverageModule::CreateInitCallsForSections(
     Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
     const char *Section) {
   auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
@@ -492,11 +310,12 @@ Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
   return CtorFunc;
 }
 
-void SanitizerCoverage::initializeModule(Module &M) {
+bool SanitizerCoverageModule::runOnModule(Module &M) {
   if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
-    return;
+    return false;
   C = &(M.getContext());
   DL = &M.getDataLayout();
+  CurModule = &M;
   CurModuleUniqueId = getUniqueModuleId(CurModule);
   TargetTriple = Triple(M.getTargetTriple());
   FunctionGuardArray = nullptr;
@@ -564,6 +383,11 @@ void SanitizerCoverage::initializeModule(Module &M) {
   Constant *SanCovLowestStackConstant =
       M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
   SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+  if (!SanCovLowestStack) {
+    C->emitError(StringRef("'") + SanCovLowestStackName +
+                 "' should not be declared by the user");
+    return true;
+  }
   SanCovLowestStack->setThreadLocalMode(
       GlobalValue::ThreadLocalMode::InitialExecTLSModel);
   if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
@@ -577,14 +401,33 @@ void SanitizerCoverage::initializeModule(Module &M) {
   SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
   SanCovTracePCGuard =
       M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
-}
 
-void SanitizerCoverage::finalizeModule(Module &M) {
+  for (auto &F : M)
+    runOnFunction(F);
+
+  Function *Ctor = nullptr;
+
+  if (FunctionGuardArray)
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+                                      SanCovTracePCGuardInitName, Int32PtrTy,
+                                      SanCovGuardsSectionName);
+  if (Function8bitCounterArray)
+    Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+                                      SanCov8bitCountersInitName, Int8PtrTy,
+                                      SanCovCountersSectionName);
+  if (Ctor && Options.PCTable) {
+    auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
+    FunctionCallee InitFunction = declareSanitizerInitFunction(
+        M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
+    IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
+    IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
+  }
   // We don't reference these arrays directly in any of our runtime functions,
   // so we need to prevent them from being dead stripped.
   if (TargetTriple.isOSBinFormatMachO())
     appendToUsed(M, GlobalsToAppendToUsed);
   appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed);
+  return true;
 }
 
 // True if block has successors and it dominates all of them.
@@ -675,11 +518,28 @@ static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
   return true;
 }
 
-bool SanitizerCoverage::instrumentFunction(Function &F, const DominatorTree *DT,
-                                           const PostDominatorTree *PDT) {
-  if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
+bool SanitizerCoverageModule::runOnFunction(Function &F) {
+  if (F.empty())
+    return false;
+  if (F.getName().find(".module_ctor") != std::string::npos)
+    return false; // Should not instrument sanitizer init functions.
+  if (F.getName().startswith("__sanitizer_"))
+    return false;  // Don't instrument __sanitizer_* callbacks.
+  // Don't touch available_externally functions, their actual body is elewhere.
+  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
+    return false;
+  // Don't instrument MSVC CRT configuration helpers. They may run before normal
+  // initialization.
+  if (F.getName() == "__local_stdio_printf_options" ||
+      F.getName() == "__local_stdio_scanf_options")
+    return false;
+  if (isa<UnreachableInst>(F.getEntryBlock().getTerminator()))
     return false;
-  if (!canInstrumentWithSancov(F))
+  // Don't instrument functions using SEH for now. Splitting basic blocks like
+  // we do for coverage breaks WinEHPrepare.
+  // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+  if (F.hasPersonalityFn() &&
+      isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
     return false;
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
     SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
@@ -690,6 +550,10 @@ bool SanitizerCoverage::instrumentFunction(Function &F, const DominatorTree *DT,
   SmallVector<BinaryOperator *, 8> DivTraceTargets;
   SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
 
+  const DominatorTree *DT =
+      &getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+  const PostDominatorTree *PDT =
+      &getAnalysis<PostDominatorTreeWrapperPass>(F).getPostDomTree();
   bool IsLeafFunc = true;
 
   for (auto &BB : F) {
@@ -732,7 +596,7 @@ bool SanitizerCoverage::instrumentFunction(Function &F, const DominatorTree *DT,
   return true;
 }
 
-GlobalVariable *SanitizerCoverage::CreateFunctionLocalArrayInSection(
+GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
     size_t NumElements, Function &F, Type *Ty, const char *Section) {
   ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
   auto Array = new GlobalVariable(
@@ -755,8 +619,8 @@ GlobalVariable *SanitizerCoverage::CreateFunctionLocalArrayInSection(
 }
 
 GlobalVariable *
-SanitizerCoverage::CreatePCArray(Function &F,
-                                 ArrayRef<BasicBlock *> AllBlocks) {
+SanitizerCoverageModule::CreatePCArray(Function &F,
+                                       ArrayRef<BasicBlock *> AllBlocks) {
   size_t N = AllBlocks.size();
   assert(N);
   SmallVector<Constant *, 32> PCs;
@@ -782,7 +646,7 @@ SanitizerCoverage::CreatePCArray(Function &F,
   return PCArray;
 }
 
-void SanitizerCoverage::CreateFunctionLocalArrays(
+void SanitizerCoverageModule::CreateFunctionLocalArrays(
     Function &F, ArrayRef<BasicBlock *> AllBlocks) {
   if (Options.TracePCGuard)
     FunctionGuardArray = CreateFunctionLocalArrayInSection(
@@ -796,9 +660,9 @@ void SanitizerCoverage::CreateFunctionLocalArrays(
     FunctionPCsArray = CreatePCArray(F, AllBlocks);
 }
 
-bool SanitizerCoverage::InjectCoverage(Function &F,
-                                       ArrayRef<BasicBlock *> AllBlocks,
-                                       bool IsLeafFunc) {
+bool SanitizerCoverageModule::InjectCoverage(Function &F,
+                                             ArrayRef<BasicBlock *> AllBlocks,
+                                             bool IsLeafFunc) {
   if (AllBlocks.empty()) return false;
   CreateFunctionLocalArrays(F, AllBlocks);
   for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
@@ -813,7 +677,7 @@ bool SanitizerCoverage::InjectCoverage(Function &F,
 //     The cache is used to speed up recording the caller-callee pairs.
 // The address of the caller is passed implicitly via caller PC.
 // CacheSize is encoded in the name of the run-time function.
-void SanitizerCoverage::InjectCoverageForIndirectCalls(
+void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
     Function &F, ArrayRef<Instruction *> IndirCalls) {
   if (IndirCalls.empty())
     return;
@@ -832,7 +696,7 @@ void SanitizerCoverage::InjectCoverageForIndirectCalls(
 // __sanitizer_cov_trace_switch(CondValue,
 //      {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
 
-void SanitizerCoverage::InjectTraceForSwitch(
+void SanitizerCoverageModule::InjectTraceForSwitch(
     Function &, ArrayRef<Instruction *> SwitchTraceTargets) {
   for (auto I : SwitchTraceTargets) {
     if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
@@ -871,7 +735,7 @@ void SanitizerCoverage::InjectTraceForSwitch(
   }
 }
 
-void SanitizerCoverage::InjectTraceForDiv(
+void SanitizerCoverageModule::InjectTraceForDiv(
     Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
   for (auto BO : DivTraceTargets) {
     IRBuilder<> IRB(BO);
@@ -889,7 +753,7 @@ void SanitizerCoverage::InjectTraceForDiv(
   }
 }
 
-void SanitizerCoverage::InjectTraceForGep(
+void SanitizerCoverageModule::InjectTraceForGep(
     Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
   for (auto GEP : GepTraceTargets) {
     IRBuilder<> IRB(GEP);
@@ -900,7 +764,7 @@ void SanitizerCoverage::InjectTraceForGep(
   }
 }
 
-void SanitizerCoverage::InjectTraceForCmp(
+void SanitizerCoverageModule::InjectTraceForCmp(
     Function &, ArrayRef<Instruction *> CmpTraceTargets) {
   for (auto I : CmpTraceTargets) {
     if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
@@ -935,8 +799,9 @@ void SanitizerCoverage::InjectTraceForCmp(
   }
 }
 
-void SanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
-                                              size_t Idx, bool IsLeafFunc) {
+void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+                                                    size_t Idx,
+                                                    bool IsLeafFunc) {
   BasicBlock::iterator IP = BB.getFirstInsertionPt();
   bool IsEntryBB = &BB == &F.getEntryBlock();
   DebugLoc EntryLoc;
@@ -993,7 +858,7 @@ void SanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
 }
 
 std::string
-SanitizerCoverage::getSectionName(const std::string &Section) const {
+SanitizerCoverageModule::getSectionName(const std::string &Section) const {
   if (TargetTriple.isOSBinFormatCOFF()) {
     if (Section == SanCovCountersSectionName)
       return ".SCOV$CM";
@@ -1006,25 +871,33 @@ SanitizerCoverage::getSectionName(const std::string &Section) const {
   return "__" + Section;
 }
 
-INITIALIZE_PASS(ModuleSanitizerCoverageLegacyPass, "module-sancov",
-                "Pass for inserting sancov top-level initialization calls",
-                false, false)
+std::string
+SanitizerCoverageModule::getSectionStart(const std::string &Section) const {
+  if (TargetTriple.isOSBinFormatMachO())
+    return "\1section$start$__DATA$__" + Section;
+  return "__start___" + Section;
+}
+
+std::string
+SanitizerCoverageModule::getSectionEnd(const std::string &Section) const {
+  if (TargetTriple.isOSBinFormatMachO())
+    return "\1section$end$__DATA$__" + Section;
+  return "__stop___" + Section;
+}
+
 
-char SanitizerCoverageLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(SanitizerCoverageLegacyPass, "sancov",
-                      "Pass for instrumenting coverage on functions", false,
-                      false)
-INITIALIZE_PASS_DEPENDENCY(ModuleSanitizerCoverageLegacyPass)
+char SanitizerCoverageModule::ID = 0;
+INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov",
+                      "SanitizerCoverage: TODO."
+                      "ModulePass",
+                      false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SanitizerCoverageLegacyPass, "sancov",
-                    "Pass for instrumenting coverage on functions", false,
-                    false)
-FunctionPass *llvm::createSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options) {
-  return new SanitizerCoverageLegacyPass(Options);
-}
-ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
+INITIALIZE_PASS_END(SanitizerCoverageModule, "sancov",
+                    "SanitizerCoverage: TODO."
+                    "ModulePass",
+                    false, false)
+ModulePass *llvm::createSanitizerCoverageModulePass(
     const SanitizerCoverageOptions &Options) {
-  return new ModuleSanitizerCoverageLegacyPass(Options);
+  return new SanitizerCoverageModule(Options);
 }
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll b/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll
index 5711669240c6f..9bc8acef481d7 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll
@@ -1,6 +1,5 @@
 ; Checks that a function with no-return in the entry block is not instrumented.
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s
 ; CHECK-NOT: call void @__sanitizer_cov_trace_pc_guard
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll b/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll
index 5e9e579e17120..103198311279b 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll
@@ -1,8 +1,6 @@
 ; Test -sanitizer-coverage-trace-compares=1 and how it prunes backedge compares.
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=PRUNE
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=NOPRUNE
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=PRUNE
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=NOPRUNE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/chains.ll b/llvm/test/Instrumentation/SanitizerCoverage/chains.ll
index 7618267069ada..86b109165ee5c 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/chains.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/chains.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1  -S | FileCheck %s
 
 define i32 @blah(i32) #0 {
   %2 = icmp sgt i32 %0, 1
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll
index 976bc77beb6f2..0f42756fdcb90 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-compares=1 API declarations on a non-x86_64 arch
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
 
 target triple = "i386-unknown-linux-gnu"
 define i32 @foo() #0 {
@@ -8,16 +7,16 @@ entry:
   ret i32 0
 }
 
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp1(i8, i8)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp2(i16, i16)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp4(i32, i32)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp8(i64, i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_div4(i32)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_div8(i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_gep(i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_switch(i64, i64*)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc()
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard(i32*)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*)
+; CHECK: declare void @__sanitizer_cov_trace_pc_indir(i64)
+; CHECK: declare void @__sanitizer_cov_trace_cmp1(i8, i8)
+; CHECK: declare void @__sanitizer_cov_trace_cmp2(i16, i16)
+; CHECK: declare void @__sanitizer_cov_trace_cmp4(i32, i32)
+; CHECK: declare void @__sanitizer_cov_trace_cmp8(i64, i64)
+; CHECK: declare void @__sanitizer_cov_trace_div4(i32)
+; CHECK: declare void @__sanitizer_cov_trace_div8(i64)
+; CHECK: declare void @__sanitizer_cov_trace_gep(i64)
+; CHECK: declare void @__sanitizer_cov_trace_switch(i64, i64*)
+; CHECK: declare void @__sanitizer_cov_trace_pc()
+; CHECK: declare void @__sanitizer_cov_trace_pc_guard(i32*)
+; CHECK: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*)
 ; CHECK-NOT: declare
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll
index 165bf744432b5..16689f9831d8e 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-compares=1 API declarations on x86_64
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
 
 target triple = "x86_64-unknown-linux-gnu"
 define i32 @foo() #0 {
@@ -8,16 +7,16 @@ entry:
   ret i32 0
 }
 
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp8(i64, i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_div4(i32 zeroext)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_div8(i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_gep(i64)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_switch(i64, i64*)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc()
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard(i32*)
-; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*)
+; CHECK: declare void @__sanitizer_cov_trace_pc_indir(i64)
+; CHECK: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext)
+; CHECK: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext)
+; CHECK: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext)
+; CHECK: declare void @__sanitizer_cov_trace_cmp8(i64, i64)
+; CHECK: declare void @__sanitizer_cov_trace_div4(i32 zeroext)
+; CHECK: declare void @__sanitizer_cov_trace_div8(i64)
+; CHECK: declare void @__sanitizer_cov_trace_gep(i64)
+; CHECK: declare void @__sanitizer_cov_trace_switch(i64, i64*)
+; CHECK: declare void @__sanitizer_cov_trace_pc()
+; CHECK: declare void @__sanitizer_cov_trace_pc_guard(i32*)
+; CHECK: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*)
 ; CHECK-NOT: declare
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll
index c599806454d82..fda6f251bc847 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-compares=1
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll
index d6019cb50a4f5..61a9dcd92de21 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s
 
 ; Make sure we use the right comdat groups for COFF to avoid relocations
 ; against discarded sections. Internal linkage functions are also different from
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll
index 31a2dd39c6ddc..d81d480009be4 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll
@@ -1,6 +1,5 @@
 ; Checks that the PC and 8-bit Counter Arrays are placed in their own sections in COFF binaries.
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.14.26433"
 
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll b/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll
index f412d2f237d21..fd12eed8e3669 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll
@@ -1,6 +1,5 @@
 ; Checks that sancov.module_ctor is marked used.
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.14.26433"
 
@@ -9,4 +8,4 @@ entry:
   ret void
 }
 
-; CHECK: @llvm.used = appending global {{.*}} @sancov.module_ctor
+; CHECK: @llvm.used = appending global {{.*}} @sancov.module_ctor
\ No newline at end of file
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll
index 0ee1a339ee521..b61b4eef5df1d 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-compares=1
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
index 03be088bfd82d..09e23372533f6 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
@@ -1,7 +1,6 @@
 ; Test that coverage instrumentation does not lose debug location.
 
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -S | FileCheck %s
 
 ; C++ source:
 ; 1: struct A {
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll
index 1e8c69827539b..7b6b5f00442fe 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -1,8 +1,6 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc  -S | FileCheck %s --check-prefix=CHECK_TRACE_PC
-; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=CHECKPRUNE
 
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc  -S | FileCheck %s --check-prefix=CHECK_TRACE_PC
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=CHECKPRUNE
+; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=CHECKPRUNE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
index 428a7d735c5ec..508657a597645 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
@@ -1,7 +1,6 @@
 ; Test that coverage instrumentation does not lose debug location.
 
 ; RUN: opt < %s -sancov  -sanitizer-coverage-level=2 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=2 -S | FileCheck %s
 
 ; C++ source:
 ; 1: void foo(int *a) {
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll
index e52366707b7f5..0de2ddf68e759 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-divs=1
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-divs=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-divs=1  -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll
index 924c2fe3eb3a5..ac6af4b37202e 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-geps=1
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-geps=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-geps=1  -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll
index d4e30aab54eab..88141678b6cd4 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-inline-8bit-counters=1
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1  -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll
index e73f1516fa103..c79a2fb5fff0d 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll
@@ -1,8 +1,6 @@
 ; Test that interposable symbols do not get put in comdats.
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-linux-gnu -S | FileCheck %s
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-windows-msvc -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-linux-gnu -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-windows-msvc -S | FileCheck %s
 
 define void @Vanilla() {
 entry:
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll b/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll
index 683238c33c81e..ec9e121439176 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll
@@ -1,7 +1,6 @@
 ; Tests that we don't insert __sanitizer_cov_trace_pc_guard_init or some such
 ; when there is no instrumentation.
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll b/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll
index f8e2a3015ba70..888277a4c5099 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll
@@ -1,8 +1,6 @@
 ; Test -sanitizer-coverage-pc-table=1
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard       -sanitizer-coverage-pc-table=1 -S | FileCheck %s
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-inline-8bit-counters -sanitizer-coverage-pc-table=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard       -sanitizer-coverage-pc-table=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-inline-8bit-counters -sanitizer-coverage-pc-table=1 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll b/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll
index ebcf3b276c9fb..c50d663eff825 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll
@@ -1,7 +1,5 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=CHECK_NO_PRUNE
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=CHECK_NO_PRUNE
 
 define i32 @foo(i32) #0 {
   %2 = icmp sgt i32 %0, 0
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/seh.ll b/llvm/test/Instrumentation/SanitizerCoverage/seh.ll
index b45f1e02ddd70..94d1a2e9acdfb 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/seh.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/seh.ll
@@ -1,9 +1,6 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=0 -S | FileCheck %s
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=0 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=2 -S | FileCheck %s
 
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll
index ff14bff1b7f9c..1ad96f82a694a 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll
@@ -2,8 +2,6 @@
 ; user declares `__sancov_lowest_stack` with an unexpected type.
 ; RUN: not opt < %s -sancov -sanitizer-coverage-level=1 \
 ; RUN:         -sanitizer-coverage-stack-depth -S 2>&1 | FileCheck %s
-; RUN: not opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 \
-; RUN:         -sanitizer-coverage-stack-depth -S 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll
index 9deb2f04d789c..0c6db1a922ada 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll
@@ -4,11 +4,6 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 \
 ; RUN:     -sanitizer-coverage-stack-depth -sanitizer-coverage-trace-pc-guard \
 ; RUN:     -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 \
-; RUN:     -sanitizer-coverage-stack-depth -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 \
-; RUN:     -sanitizer-coverage-stack-depth -sanitizer-coverage-trace-pc-guard \
-; RUN:     -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
index b109d80ff68d0..debb825db1bce 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
@@ -1,6 +1,5 @@
 ; Test -sanitizer-coverage-trace-compares=1 (instrumenting a switch)
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1  -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll
index e6633e1c3ff6a..970ee0d3ac268 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll
index 06cd192a8296e..d5c9ff451ab6f 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-pc-guard -sanitizer-coverage-inline-8bit-counters -S | FileCheck %s
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-pc-guard -sanitizer-coverage-inline-8bit-counters -S | FileCheck %s
 
 ; Module ctors should have stable names across modules, not something like
 ; @sancov.module_ctor.3 that may cause duplicate ctors after linked together.
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll
index 006c662f1440f..1fe1886975e94 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll
index 012a19ba17f1d..baf4dc1e140e1 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll
@@ -2,9 +2,6 @@
 ; RUN: opt < %s -sancov                    -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S  | FileCheck %s
 ; Make sure asan does not instrument __sancov_gen_
 ; RUN: opt < %s -sancov -asan -asan-module -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S  | FileCheck %s
-
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S  | FileCheck %s
-; RUN: opt < %s -passes='module(require<asan-globals-md>,sancov-module,asan-module),function(sancov-func,asan)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S  | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 $Foo = comdat any
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll
index c27fd0eac0bad..7bf8cf7e18e6e 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll
@@ -3,10 +3,6 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s --check-prefix=CHECK_PC_GUARD
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=CHECK_PC_GUARD_DARWIN
 
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc  -S | FileCheck %s --check-prefix=CHECK_PC
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S | FileCheck %s --check-prefix=CHECK_PC_GUARD
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard  -S -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=CHECK_PC_GUARD_DARWIN
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 define void @foo(i32* %a) sanitize_address {
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll b/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll
index e3e31086ccb82..ad6cd574d7e0e 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -S -sancov -sanitizer-coverage-level=3 | FileCheck %s
-; RUN: opt < %s -S -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 | FileCheck %s
 
 ; The critical edges to unreachable_bb should not be split.
 define i32 @foo(i32 %c, i32 %d) {
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll b/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll
index 350242bb3016d..87b44be5544f3 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK
-; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK
 
 ; Generated from this C++ source:
 ; $ clang -O2 t.cpp -S -emit-llvm

From 93dfb93ad68cf2729701d0c3ee66af44076e9f17 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Mon, 15 Jul 2019 23:36:02 +0000
Subject: [PATCH 186/451] Temporarily Revert "[SLP] Recommit: Look-ahead
 operand reordering heuristic."

As there are some reported miscompiles with AVX512 and performance regressions
in Eigen. Verified with the original committer and testcases will be forthcoming.

This reverts commit r364964.

llvm-svn: 366154
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 294 +++---------------
 .../Transforms/SLPVectorizer/X86/lookahead.ll | 208 ++-----------
 2 files changed, 79 insertions(+), 423 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8cd948ee55190..27a86c0bca914 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -147,20 +147,6 @@ static cl::opt<unsigned> MinTreeSize(
     "slp-min-tree-size", cl::init(3), cl::Hidden,
     cl::desc("Only vectorize small trees if they are fully vectorizable"));
 
-// The maximum depth that the look-ahead score heuristic will explore.
-// The higher this value, the higher the compilation time overhead.
-static cl::opt<int> LookAheadMaxDepth(
-    "slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
-    cl::desc("The maximum look-ahead depth for operand reordering scores"));
-
-// The Look-ahead heuristic goes through the users of the bundle to calculate
-// the users cost in getExternalUsesCost(). To avoid compilation time increase
-// we limit the number of users visited to this value.
-static cl::opt<unsigned> LookAheadUsersBudget(
-    "slp-look-ahead-users-budget", cl::init(2), cl::Hidden,
-    cl::desc("The maximum number of users to visit while visiting the "
-             "predecessors. This prevents compilation time increase."));
-
 static cl::opt<bool>
     ViewSLPTree("view-slp-tree", cl::Hidden,
                 cl::desc("Display the SLP trees with Graphviz"));
@@ -722,7 +708,6 @@ class BoUpSLP {
 
     const DataLayout &DL;
     ScalarEvolution &SE;
-    const BoUpSLP &R;
 
     /// \returns the operand data at \p OpIdx and \p Lane.
     OperandData &getData(unsigned OpIdx, unsigned Lane) {
@@ -748,215 +733,6 @@ class BoUpSLP {
       std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
     }
 
-    // The hard-coded scores listed here are not very important. When computing
-    // the scores of matching one sub-tree with another, we are basically
-    // counting the number of values that are matching. So even if all scores
-    // are set to 1, we would still get a decent matching result.
-    // However, sometimes we have to break ties. For example we may have to
-    // choose between matching loads vs matching opcodes. This is what these
-    // scores are helping us with: they provide the order of preference.
-
-    /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
-    static const int ScoreConsecutiveLoads = 3;
-    /// Constants.
-    static const int ScoreConstants = 2;
-    /// Instructions with the same opcode.
-    static const int ScoreSameOpcode = 2;
-    /// Instructions with alt opcodes (e.g, add + sub).
-    static const int ScoreAltOpcodes = 1;
-    /// Identical instructions (a.k.a. splat or broadcast).
-    static const int ScoreSplat = 1;
-    /// Matching with an undef is preferable to failing.
-    static const int ScoreUndef = 1;
-    /// Score for failing to find a decent match.
-    static const int ScoreFail = 0;
-    /// User exteranl to the vectorized code.
-    static const int ExternalUseCost = 1;
-    /// The user is internal but in a different lane.
-    static const int UserInDiffLaneCost = ExternalUseCost;
-
-    /// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
-    static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
-                               ScalarEvolution &SE) {
-      auto *LI1 = dyn_cast<LoadInst>(V1);
-      auto *LI2 = dyn_cast<LoadInst>(V2);
-      if (LI1 && LI2)
-        return isConsecutiveAccess(LI1, LI2, DL, SE)
-                   ? VLOperands::ScoreConsecutiveLoads
-                   : VLOperands::ScoreFail;
-
-      auto *C1 = dyn_cast<Constant>(V1);
-      auto *C2 = dyn_cast<Constant>(V2);
-      if (C1 && C2)
-        return VLOperands::ScoreConstants;
-
-      auto *I1 = dyn_cast<Instruction>(V1);
-      auto *I2 = dyn_cast<Instruction>(V2);
-      if (I1 && I2) {
-        if (I1 == I2)
-          return VLOperands::ScoreSplat;
-        InstructionsState S = getSameOpcode({I1, I2});
-        // Note: Only consider instructions with <= 2 operands to avoid
-        // complexity explosion.
-        if (S.getOpcode() && S.MainOp->getNumOperands() <= 2)
-          return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
-                                  : VLOperands::ScoreSameOpcode;
-      }
-
-      if (isa<UndefValue>(V2))
-        return VLOperands::ScoreUndef;
-
-      return VLOperands::ScoreFail;
-    }
-
-    /// Holds the values and their lane that are taking part in the look-ahead
-    /// score calculation. This is used in the external uses cost calculation.
-    SmallDenseMap<Value *, int> InLookAheadValues;
-
-    /// \Returns the additinal cost due to uses of \p LHS and \p RHS that are
-    /// either external to the vectorized code, or require shuffling.
-    int getExternalUsesCost(const std::pair<Value *, int> &LHS,
-                            const std::pair<Value *, int> &RHS) {
-      int Cost = 0;
-      SmallVector<std::pair<Value *, int>, 2> Values = {LHS, RHS};
-      for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
-        Value *V = Values[Idx].first;
-        // Calculate the absolute lane, using the minimum relative lane of LHS
-        // and RHS as base and Idx as the offset.
-        int Ln = std::min(LHS.second, RHS.second) + Idx;
-        assert(Ln >= 0 && "Bad lane calculation");
-        unsigned UsersBudget = LookAheadUsersBudget;
-        for (User *U : V->users()) {
-          if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
-            // The user is in the VectorizableTree. Check if we need to insert.
-            auto It = llvm::find(UserTE->Scalars, U);
-            assert(It != UserTE->Scalars.end() && "U is in UserTE");
-            int UserLn = std::distance(UserTE->Scalars.begin(), It);
-            assert(UserLn >= 0 && "Bad lane");
-            if (UserLn != Ln)
-              Cost += UserInDiffLaneCost;
-          } else {
-            // Check if the user is in the look-ahead code.
-            auto It2 = InLookAheadValues.find(U);
-            if (It2 != InLookAheadValues.end()) {
-              // The user is in the look-ahead code. Check the lane.
-              if (It2->second != Ln)
-                Cost += UserInDiffLaneCost;
-            } else {
-              // The user is neither in SLP tree nor in the look-ahead code.
-              Cost += ExternalUseCost;
-            }
-          }
-          // Limit the number of visited uses to cap compilation time.
-          if (--UsersBudget == 0)
-            break;
-        }
-      }
-      return Cost;
-    }
-
-    /// Go through the operands of \p LHS and \p RHS recursively until \p
-    /// MaxLevel, and return the cummulative score. For example:
-    /// \verbatim
-    ///  A[0]  B[0]  A[1]  B[1]  C[0] D[0]  B[1] A[1]
-    ///     \ /         \ /         \ /        \ /
-    ///      +           +           +          +
-    ///     G1          G2          G3         G4
-    /// \endverbatim
-    /// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at
-    /// each level recursively, accumulating the score. It starts from matching
-    /// the additions at level 0, then moves on to the loads (level 1). The
-    /// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
-    /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
-    /// {A[0],C[0]} has a score of VLOperands::ScoreFail.
-    /// Please note that the order of the operands does not matter, as we
-    /// evaluate the score of all profitable combinations of operands. In
-    /// other words the score of G1 and G4 is the same as G1 and G2. This
-    /// heuristic is based on ideas described in:
-    ///   Look-ahead SLP: Auto-vectorization in the presence of commutative
-    ///   operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
-    ///   Luís F. W. Góes
-    int getScoreAtLevelRec(const std::pair<Value *, int> &LHS,
-                           const std::pair<Value *, int> &RHS, int CurrLevel,
-                           int MaxLevel) {
-
-      Value *V1 = LHS.first;
-      Value *V2 = RHS.first;
-      // Get the shallow score of V1 and V2.
-      int ShallowScoreAtThisLevel =
-          std::max((int)ScoreFail, getShallowScore(V1, V2, DL, SE) -
-                                       getExternalUsesCost(LHS, RHS));
-      int Lane1 = LHS.second;
-      int Lane2 = RHS.second;
-
-      // If reached MaxLevel,
-      //  or if V1 and V2 are not instructions,
-      //  or if they are SPLAT,
-      //  or if they are not consecutive, early return the current cost.
-      auto *I1 = dyn_cast<Instruction>(V1);
-      auto *I2 = dyn_cast<Instruction>(V2);
-      if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
-          ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
-          (isa<LoadInst>(I1) && isa<LoadInst>(I2) && ShallowScoreAtThisLevel))
-        return ShallowScoreAtThisLevel;
-      assert(I1 && I2 && "Should have early exited.");
-
-      // Keep track of in-tree values for determining the external-use cost.
-      InLookAheadValues[V1] = Lane1;
-      InLookAheadValues[V2] = Lane2;
-
-      // Contains the I2 operand indexes that got matched with I1 operands.
-      SmallSet<unsigned, 4> Op2Used;
-
-      // Recursion towards the operands of I1 and I2. We are trying all possbile
-      // operand pairs, and keeping track of the best score.
-      for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
-           OpIdx1 != NumOperands1; ++OpIdx1) {
-        // Try to pair op1I with the best operand of I2.
-        int MaxTmpScore = 0;
-        unsigned MaxOpIdx2 = 0;
-        bool FoundBest = false;
-        // If I2 is commutative try all combinations.
-        unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1;
-        unsigned ToIdx = isCommutative(I2)
-                             ? I2->getNumOperands()
-                             : std::min(I2->getNumOperands(), OpIdx1 + 1);
-        assert(FromIdx <= ToIdx && "Bad index");
-        for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) {
-          // Skip operands already paired with OpIdx1.
-          if (Op2Used.count(OpIdx2))
-            continue;
-          // Recursively calculate the cost at each level
-          int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1},
-                                            {I2->getOperand(OpIdx2), Lane2},
-                                            CurrLevel + 1, MaxLevel);
-          // Look for the best score.
-          if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
-            MaxTmpScore = TmpScore;
-            MaxOpIdx2 = OpIdx2;
-            FoundBest = true;
-          }
-        }
-        if (FoundBest) {
-          // Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it.
-          Op2Used.insert(MaxOpIdx2);
-          ShallowScoreAtThisLevel += MaxTmpScore;
-        }
-      }
-      return ShallowScoreAtThisLevel;
-    }
-
-    /// \Returns the look-ahead score, which tells us how much the sub-trees
-    /// rooted at \p LHS and \p RHS match, the more they match the higher the
-    /// score. This helps break ties in an informed way when we cannot decide on
-    /// the order of the operands by just considering the immediate
-    /// predecessors.
-    int getLookAheadScore(const std::pair<Value *, int> &LHS,
-                          const std::pair<Value *, int> &RHS) {
-      InLookAheadValues.clear();
-      return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth);
-    }
-
     // Search all operands in Ops[*][Lane] for the one that matches best
     // Ops[OpIdx][LastLane] and return its opreand index.
     // If no good match can be found, return None.
@@ -974,6 +750,9 @@ class BoUpSLP {
       // The linearized opcode of the operand at OpIdx, Lane.
       bool OpIdxAPO = getData(OpIdx, Lane).APO;
 
+      const unsigned BestScore = 2;
+      const unsigned GoodScore = 1;
+
       // The best operand index and its score.
       // Sometimes we have more than one option (e.g., Opcode and Undefs), so we
       // are using the score to differentiate between the two.
@@ -1002,19 +781,41 @@ class BoUpSLP {
         // Look for an operand that matches the current mode.
         switch (RMode) {
         case ReorderingMode::Load:
+          if (isa<LoadInst>(Op)) {
+            // Figure out which is left and right, so that we can check for
+            // consecutive loads
+            bool LeftToRight = Lane > LastLane;
+            Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
+            Value *OpRight = (LeftToRight) ? Op : OpLastLane;
+            if (isConsecutiveAccess(cast<LoadInst>(OpLeft),
+                                    cast<LoadInst>(OpRight), DL, SE))
+              BestOp.Idx = Idx;
+          }
+          break;
+        case ReorderingMode::Opcode:
+          // We accept both Instructions and Undefs, but with different scores.
+          if ((isa<Instruction>(Op) && isa<Instruction>(OpLastLane) &&
+               cast<Instruction>(Op)->getOpcode() ==
+                   cast<Instruction>(OpLastLane)->getOpcode()) ||
+              (isa<UndefValue>(OpLastLane) && isa<Instruction>(Op)) ||
+              isa<UndefValue>(Op)) {
+            // An instruction has a higher score than an undef.
+            unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+            if (Score > BestOp.Score) {
+              BestOp.Idx = Idx;
+              BestOp.Score = Score;
+            }
+          }
+          break;
         case ReorderingMode::Constant:
-        case ReorderingMode::Opcode: {
-          bool LeftToRight = Lane > LastLane;
-          Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
-          Value *OpRight = (LeftToRight) ? Op : OpLastLane;
-          unsigned Score =
-              getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane});
-          if (Score > BestOp.Score) {
-            BestOp.Idx = Idx;
-            BestOp.Score = Score;
+          if (isa<Constant>(Op)) {
+            unsigned Score = (isa<UndefValue>(Op)) ? GoodScore : BestScore;
+            if (Score > BestOp.Score) {
+              BestOp.Idx = Idx;
+              BestOp.Score = Score;
+            }
           }
           break;
-        }
         case ReorderingMode::Splat:
           if (Op == OpLastLane)
             BestOp.Idx = Idx;
@@ -1145,8 +946,8 @@ class BoUpSLP {
   public:
     /// Initialize with all the operands of the instruction vector \p RootVL.
     VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
-               ScalarEvolution &SE, const BoUpSLP &R)
-        : DL(DL), SE(SE), R(R) {
+               ScalarEvolution &SE)
+        : DL(DL), SE(SE) {
       // Append all the operands of RootVL.
       appendOperandsOfVL(RootVL);
     }
@@ -1368,8 +1169,7 @@ class BoUpSLP {
                                              SmallVectorImpl<Value *> &Left,
                                              SmallVectorImpl<Value *> &Right,
                                              const DataLayout &DL,
-                                             ScalarEvolution &SE,
-                                             const BoUpSLP &R);
+                                             ScalarEvolution &SE);
   struct TreeEntry {
     using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
     TreeEntry(VecTreeTy &Container) : Container(Container) {}
@@ -2571,7 +2371,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         // Commutative predicate - collect + sort operands of the instructions
         // so that each side is more likely to have the same opcode.
         assert(P0 == SwapP0 && "Commutative Predicate mismatch");
-        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
       } else {
         // Collect operands - commute if it uses the swapped predicate.
         for (Value *V : VL) {
@@ -2616,7 +2416,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // have the same opcode.
       if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
         ValueList Left, Right;
-        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
         buildTree_rec(Left, Depth + 1, {TE, 0});
         buildTree_rec(Right, Depth + 1, {TE, 1});
         return;
@@ -2785,7 +2585,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // Reorder operands if reordering would enable vectorization.
       if (isa<BinaryOperator>(VL0)) {
         ValueList Left, Right;
-        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
         buildTree_rec(Left, Depth + 1, {TE, 0});
         buildTree_rec(Right, Depth + 1, {TE, 1});
         return;
@@ -3506,15 +3306,13 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
 
 // Perform operand reordering on the instructions in VL and return the reordered
 // operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
-                                             SmallVectorImpl<Value *> &Left,
-                                             SmallVectorImpl<Value *> &Right,
-                                             const DataLayout &DL,
-                                             ScalarEvolution &SE,
-                                             const BoUpSLP &R) {
+void BoUpSLP::reorderInputsAccordingToOpcode(
+    ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
+    SmallVectorImpl<Value *> &Right, const DataLayout &DL,
+    ScalarEvolution &SE) {
   if (VL.empty())
     return;
-  VLOperands Ops(VL, DL, SE, R);
+  VLOperands Ops(VL, DL, SE);
   // Reorder the operands in place.
   Ops.reorder();
   Left = Ops.getVL(0);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index f9cbf2ff01c1c..f89cae88a5fbc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -27,19 +27,22 @@ define void @lookahead_basic(double* %array) {
 ; CHECK-NEXT:    [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
 ; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
 ; CHECK-NEXT:    [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
+; CHECK-NEXT:    [[A_0:%.*]] = load double, double* [[IDX0]], align 8
+; CHECK-NEXT:    [[A_1:%.*]] = load double, double* [[IDX1]], align 8
+; CHECK-NEXT:    [[B_0:%.*]] = load double, double* [[IDX2]], align 8
+; CHECK-NEXT:    [[B_1:%.*]] = load double, double* [[IDX3]], align 8
+; CHECK-NEXT:    [[C_0:%.*]] = load double, double* [[IDX4]], align 8
+; CHECK-NEXT:    [[C_1:%.*]] = load double, double* [[IDX5]], align 8
+; CHECK-NEXT:    [[D_0:%.*]] = load double, double* [[IDX6]], align 8
+; CHECK-NEXT:    [[D_1:%.*]] = load double, double* [[IDX7]], align 8
+; CHECK-NEXT:    [[SUBAB_0:%.*]] = fsub fast double [[A_0]], [[B_0]]
+; CHECK-NEXT:    [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]]
+; CHECK-NEXT:    [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]]
+; CHECK-NEXT:    [[SUBCD_1:%.*]] = fsub fast double [[C_1]], [[D_1]]
+; CHECK-NEXT:    [[ADDABCD_0:%.*]] = fadd fast double [[SUBAB_0]], [[SUBCD_0]]
+; CHECK-NEXT:    [[ADDCDAB_1:%.*]] = fadd fast double [[SUBCD_1]], [[SUBAB_1]]
+; CHECK-NEXT:    store double [[ADDABCD_0]], double* [[IDX0]], align 8
+; CHECK-NEXT:    store double [[ADDCDAB_1]], double* [[IDX1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -161,23 +164,22 @@ define void @lookahead_alt2(double* %array) {
 ; CHECK-NEXT:    [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
 ; CHECK-NEXT:    [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
 ; CHECK-NEXT:    [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>*
-; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>*
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP10]]
-; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP14]], <2 x double>* [[TMP15]], align 8
+; CHECK-NEXT:    [[A_0:%.*]] = load double, double* [[IDX0]], align 8
+; CHECK-NEXT:    [[A_1:%.*]] = load double, double* [[IDX1]], align 8
+; CHECK-NEXT:    [[B_0:%.*]] = load double, double* [[IDX2]], align 8
+; CHECK-NEXT:    [[B_1:%.*]] = load double, double* [[IDX3]], align 8
+; CHECK-NEXT:    [[C_0:%.*]] = load double, double* [[IDX4]], align 8
+; CHECK-NEXT:    [[C_1:%.*]] = load double, double* [[IDX5]], align 8
+; CHECK-NEXT:    [[D_0:%.*]] = load double, double* [[IDX6]], align 8
+; CHECK-NEXT:    [[D_1:%.*]] = load double, double* [[IDX7]], align 8
+; CHECK-NEXT:    [[ADDAB_0:%.*]] = fadd fast double [[A_0]], [[B_0]]
+; CHECK-NEXT:    [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]]
+; CHECK-NEXT:    [[ADDCD_1:%.*]] = fadd fast double [[C_1]], [[D_1]]
+; CHECK-NEXT:    [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]]
+; CHECK-NEXT:    [[ADDABCD_0:%.*]] = fadd fast double [[ADDAB_0]], [[SUBCD_0]]
+; CHECK-NEXT:    [[ADDCDAB_1:%.*]] = fadd fast double [[ADDCD_1]], [[SUBAB_1]]
+; CHECK-NEXT:    store double [[ADDABCD_0]], double* [[IDX0]], align 8
+; CHECK-NEXT:    store double [[ADDCDAB_1]], double* [[IDX1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -237,97 +239,6 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double
 ; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
 ; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
 ; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
-; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
-; CHECK-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
-; CHECK-NEXT:    [[D0:%.*]] = load double, double* [[IDXD0]], align 8
-; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
-; CHECK-NEXT:    [[B2:%.*]] = load double, double* [[IDXB2]], align 8
-; CHECK-NEXT:    [[A2:%.*]] = load double, double* [[IDXA2]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[A1]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[B2]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP1]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
-; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
-; CHECK-NEXT:    store double [[A1]], double* [[EXT1:%.*]], align 8
-; CHECK-NEXT:    ret void
-;
-entry:
-  %IdxA0 = getelementptr inbounds double, double* %A, i64 0
-  %IdxB0 = getelementptr inbounds double, double* %B, i64 0
-  %IdxC0 = getelementptr inbounds double, double* %C, i64 0
-  %IdxD0 = getelementptr inbounds double, double* %D, i64 0
-
-  %IdxA1 = getelementptr inbounds double, double* %A, i64 1
-  %IdxB2 = getelementptr inbounds double, double* %B, i64 2
-  %IdxA2 = getelementptr inbounds double, double* %A, i64 2
-  %IdxB1 = getelementptr inbounds double, double* %B, i64 1
-
-  %A0 = load double, double *%IdxA0, align 8
-  %B0 = load double, double *%IdxB0, align 8
-  %C0 = load double, double *%IdxC0, align 8
-  %D0 = load double, double *%IdxD0, align 8
-
-  %A1 = load double, double *%IdxA1, align 8
-  %B2 = load double, double *%IdxB2, align 8
-  %A2 = load double, double *%IdxA2, align 8
-  %B1 = load double, double *%IdxB1, align 8
-
-  %subA0B0 = fsub fast double %A0, %B0
-  %subC0D0 = fsub fast double %C0, %D0
-
-  %subA1B2 = fsub fast double %A1, %B2
-  %subA2B1 = fsub fast double %A2, %B1
-
-  %add0 = fadd fast double %subA0B0, %subC0D0
-  %add1 = fadd fast double %subA1B2, %subA2B1
-
-  %IdxS0 = getelementptr inbounds double, double* %S, i64 0
-  %IdxS1 = getelementptr inbounds double, double* %S, i64 1
-
-  store double %add0, double *%IdxS0, align 8
-  store double %add1, double *%IdxS1, align 8
-
-  ; External use
-  store double %A1, double *%Ext1, align 8
-  ret void
-}
-
-; A[0] B[0] C[0] D[0]  A[1] B[2] A[2] B[1]
-;     \  /   \  /       /  \  /   \  / \
-;       -     -    U1,U2,U3  -     -  U4,U5
-;        \   /                \   /
-;          +                    +
-;          |                    |
-;         S[0]                 S[1]
-;
-;
-; If we limit the users budget for the look-ahead heuristic to 2, then the
-; look-ahead heuristic has no way of choosing B[1] (with 2 external users)
-; over A[1] (with 3 external users).
-; The result is that the operands are of the Add not reordered and the loads
-; from A get vectorized instead of the loads from B.
-;
-define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2, double *%Ext3, double *%Ext4, double *%Ext5) {
-; CHECK-LABEL: @lookahead_limit_users_budget(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
-; CHECK-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
-; CHECK-NEXT:    [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
-; CHECK-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
-; CHECK-NEXT:    [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
-; CHECK-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
 ; CHECK-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
 ; CHECK-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
 ; CHECK-NEXT:    [[D0:%.*]] = load double, double* [[IDXD0]], align 8
@@ -351,10 +262,6 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do
 ; CHECK-NEXT:    store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 ; CHECK-NEXT:    store double [[TMP12]], double* [[EXT1:%.*]], align 8
-; CHECK-NEXT:    store double [[TMP12]], double* [[EXT2:%.*]], align 8
-; CHECK-NEXT:    store double [[TMP12]], double* [[EXT3:%.*]], align 8
-; CHECK-NEXT:    store double [[B1]], double* [[EXT4:%.*]], align 8
-; CHECK-NEXT:    store double [[B1]], double* [[EXT5:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -393,56 +300,7 @@ entry:
   store double %add0, double *%IdxS0, align 8
   store double %add1, double *%IdxS1, align 8
 
-  ; External uses of A1
+  ; External use
   store double %A1, double *%Ext1, align 8
-  store double %A1, double *%Ext2, align 8
-  store double %A1, double *%Ext3, align 8
-
-  ; External uses of B1
-  store double %B1, double *%Ext4, align 8
-  store double %B1, double *%Ext5, align 8
-
-  ret void
-}
-
-; This checks that the lookahead code does not crash when instructions with the same opcodes have different numbers of operands (in this case the calls).
-
-%Class = type { i8 }
-declare double @_ZN1i2ayEv(%Class*)
-declare double @_ZN1i2axEv()
-
-define void @lookahead_crash(double* %A, double *%S, %Class *%Arg0) {
-; CHECK-LABEL: @lookahead_crash(
-; CHECK-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
-; CHECK-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    [[C0:%.*]] = call double @_ZN1i2ayEv(%Class* [[ARG0:%.*]])
-; CHECK-NEXT:    [[C1:%.*]] = call double @_ZN1i2axEv()
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
-; CHECK-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
-; CHECK-NEXT:    ret void
-;
-  %IdxA0 = getelementptr inbounds double, double* %A, i64 0
-  %IdxA1 = getelementptr inbounds double, double* %A, i64 1
-
-  %A0 = load double, double *%IdxA0, align 8
-  %A1 = load double, double *%IdxA1, align 8
-
-  %C0 = call double @_ZN1i2ayEv(%Class *%Arg0)
-  %C1 = call double @_ZN1i2axEv()
-
-  %add0 = fadd fast double %A0, %C0
-  %add1 = fadd fast double %A1, %C1
-
-  %IdxS0 = getelementptr inbounds double, double* %S, i64 0
-  %IdxS1 = getelementptr inbounds double, double* %S, i64 1
-  store double %add0, double *%IdxS0, align 8
-  store double %add1, double *%IdxS1, align 8
   ret void
 }

From fdcbd5fa48680a1f02809d2ead6259b30b00d0b1 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Mon, 15 Jul 2019 23:49:31 +0000
Subject: [PATCH 187/451] Temporarily Revert "fix unnamed fiefield issue and
 add tests for __builtin_preserve_access_index intrinsic"

The commit had tests that would only work with names in the IR.

This reverts commit r366076.

llvm-svn: 366155
---
 clang/lib/CodeGen/CGExpr.cpp                  |  21 +--
 clang/lib/CodeGen/CodeGenFunction.h           |   3 -
 .../CodeGen/builtin-preserve-access-index.c   | 177 ------------------
 .../test/Sema/builtin-preserve-access-index.c |  13 --
 4 files changed, 2 insertions(+), 212 deletions(-)
 delete mode 100644 clang/test/CodeGen/builtin-preserve-access-index.c
 delete mode 100644 clang/test/Sema/builtin-preserve-access-index.c

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 25d2424eb85ad..62d930ca8c455 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3892,23 +3892,6 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) {
   return EmitLValueForField(LambdaLV, Field);
 }
 
-/// Get the field index in the debug info. The debug info structure/union
-/// will ignore the unnamed bitfields.
-unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
-                                             unsigned FieldIndex) {
-  unsigned I = 0, Skipped = 0;
-
-  for (auto F : Rec->getDefinition()->fields()) {
-    if (I == FieldIndex)
-      break;
-    if (F->isUnnamedBitfield())
-      Skipped++;
-    I++;
-  }
-
-  return FieldIndex - Skipped;
-}
-
 /// Get the address of a zero-sized field within a record. The resulting
 /// address doesn't necessarily have the right type.
 static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
@@ -3948,7 +3931,7 @@ static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base,
       CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
   return CGF.Builder.CreatePreserveStructAccessIndex(
-      base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo);
+      base, idx, field->getFieldIndex(), DbgInfo);
 }
 
 static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
@@ -4065,7 +4048,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
           getContext().getRecordType(rec), rec->getLocation());
       addr = Address(
           Builder.CreatePreserveUnionAccessIndex(
-              addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo),
+              addr.getPointer(), field->getFieldIndex(), DbgInfo),
           addr.getAlignment());
     }
   } else {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0d534af42cddb..a51a9711ff170 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2652,9 +2652,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Converts Location to a DebugLoc, if debug information is enabled.
   llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location);
 
-  /// Get the record field index as represented in debug info.
-  unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
-
 
   //===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/clang/test/CodeGen/builtin-preserve-access-index.c b/clang/test/CodeGen/builtin-preserve-access-index.c
deleted file mode 100644
index c56c6ecc0e566..0000000000000
--- a/clang/test/CodeGen/builtin-preserve-access-index.c
+++ /dev/null
@@ -1,177 +0,0 @@
-// RUN: %clang -target x86_64 -emit-llvm -S -g %s -o - | FileCheck %s
-
-#define _(x) (__builtin_preserve_access_index(x))
-
-const void *unit1(const void *arg) {
-  return _(arg);
-}
-// CHECK: define dso_local i8* @unit1(i8* %arg)
-// CHECK-NOT: llvm.preserve.array.access.index
-// CHECK-NOT: llvm.preserve.struct.access.index
-// CHECK-NOT: llvm.preserve.union.access.index
-
-const void *unit2(void) {
-  return _((const void *)0xffffffffFFFF0000ULL);
-}
-// CHECK: define dso_local i8* @unit2()
-// CHECK-NOT: llvm.preserve.array.access.index
-// CHECK-NOT: llvm.preserve.struct.access.index
-// CHECK-NOT: llvm.preserve.union.access.index
-
-const void *unit3(const int *arg) {
-  return _(arg + 1);
-}
-// CHECK: define dso_local i8* @unit3(i32* %arg)
-// CHECK-NOT: llvm.preserve.array.access.index
-// CHECK-NOT: llvm.preserve.struct.access.index
-// CHECK-NOT: llvm.preserve.union.access.index
-
-const void *unit4(const int *arg) {
-  return _(&arg[1]);
-}
-// CHECK: define dso_local i8* @unit4(i32* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %0, i32 0, i32 1)
-
-const void *unit5(const int *arg[5]) {
-  return _(&arg[1][2]);
-}
-// CHECK: define dso_local i8* @unit5(i32** %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call i32** @llvm.preserve.array.access.index.p0p0i32.p0p0i32(i32** %0, i32 0, i32 1)
-// CHECK-NOT: getelementptr
-// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %2, i32 0, i32 2)
-
-struct s1 {
-  char a;
-  int b;
-};
-
-struct s2 {
-  char a1:1;
-  char a2:1;
-  int b;
-};
-
-struct s3 {
-  char a1:1;
-  char a2:1;
-  char :6;
-  int b;
-};
-
-const void *unit6(struct s1 *arg) {
-  return _(&arg->a);
-}
-// CHECK: define dso_local i8* @unit6(%struct.s1* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.s1s(%struct.s1* %0, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1:[0-9]+]]
-
-const void *unit7(struct s1 *arg) {
-  return _(&arg->b);
-}
-// CHECK: define dso_local i8* @unit7(%struct.s1* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %0, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1]]
-
-const void *unit8(struct s2 *arg) {
-  return _(&arg->b);
-}
-// CHECK: define dso_local i8* @unit8(%struct.s2* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s2s(%struct.s2* %0, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S2:[0-9]+]]
-
-const void *unit9(struct s3 *arg) {
-  return _(&arg->b);
-}
-// CHECK: define dso_local i8* @unit9(%struct.s3* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s3s(%struct.s3* %0, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S3:[0-9]+]]
-
-union u1 {
-  char a;
-  int b;
-};
-
-union u2 {
-  char a;
-  int :32;
-  int b;
-};
-
-const void *unit10(union u1 *arg) {
-  return _(&arg->a);
-}
-// CHECK: define dso_local i8* @unit10(%union.u1* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1:[0-9]+]]
-
-const void *unit11(union u1 *arg) {
-  return _(&arg->b);
-}
-// CHECK: define dso_local i8* @unit11(%union.u1* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %0, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1]]
-
-const void *unit12(union u2 *arg) {
-  return _(&arg->b);
-}
-// CHECK: define dso_local i8* @unit12(%union.u2* %arg)
-// CHECK-NOT: getelementptr
-// CHECK: call %union.u2* @llvm.preserve.union.access.index.p0s_union.u2s.p0s_union.u2s(%union.u2* %0, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U2:[0-9]+]]
-
-struct s4 {
-  char d;
-  union u {
-    int b[4];
-    char a;
-  } c;
-};
-
-union u3 {
-  struct s {
-    int b[4];
-  } c;
-  char a;
-};
-
-const void *unit13(struct s4 *arg) {
-  return _(&arg->c.b[2]);
-}
-// CHECK: define dso_local i8* @unit13(%struct.s4* %arg)
-// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %0, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4:[0-9]+]]
-// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %1, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U:[0-9]+]]
-// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %b, i32 1, i32 2)
-
-const void *unit14(union u3 *arg) {
-  return _(&arg->c.b[2]);
-}
-// CHECK: define dso_local i8* @unit14(%union.u3* %arg)
-// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3:[0-9]+]]
-// CHECK: call [4 x i32]* @llvm.preserve.struct.access.index.p0a4i32.p0s_struct.ss(%struct.s* %c, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_I_S:[0-9]+]]
-// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %2, i32 1, i32 2)
-
-const void *unit15(struct s4 *arg) {
-  return _(&arg[2].c.a);
-}
-// CHECK: define dso_local i8* @unit15(%struct.s4* %arg)
-// CHECK: call %struct.s4* @llvm.preserve.array.access.index.p0s_struct.s4s.p0s_struct.s4s(%struct.s4* %0, i32 0, i32 2)
-// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %1, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4]]
-// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %2, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U]]
-
-const void *unit16(union u3 *arg) {
-  return _(&arg[2].a);
-}
-// CHECK: define dso_local i8* @unit16(%union.u3* %arg)
-// CHECK: call %union.u3* @llvm.preserve.array.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %0, i32 0, i32 2)
-// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3]]
-
-// CHECK: ![[STRUCT_S1]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1",
-// CHECK: ![[STRUCT_S2]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2",
-// CHECK: ![[STRUCT_S3]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s3",
-// CHECK: ![[UNION_U1]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u1",
-// CHECK: ![[UNION_U2]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u2",
-// CHECK: ![[STRUCT_S4]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s4",
-// CHECK: ![[UNION_I_U]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u",
-// CHECK: ![[UNION_U3]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u3",
-// CHECK: ![[STRUCT_I_S]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s",
diff --git a/clang/test/Sema/builtin-preserve-access-index.c b/clang/test/Sema/builtin-preserve-access-index.c
deleted file mode 100644
index c10ceb5145b8c..0000000000000
--- a/clang/test/Sema/builtin-preserve-access-index.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s
-
-const void *invalid1(const int *arg) {
-  return __builtin_preserve_access_index(&arg[1], 1); // expected-error {{too many arguments to function call, expected 1, have 2}}
-}
-
-void *invalid2(const int *arg) {
-  return __builtin_preserve_access_index(&arg[1]); // expected-warning {{returning 'const void *' from a function with result type 'void *' discards qualifiers}}
-}
-
-const void *invalid3(const int *arg) {
-  return __builtin_preserve_access_index(1); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const void *'}}
-}

From c245249b7bd012b644c8756abf49157348b3fb15 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Mon, 15 Jul 2019 23:55:22 +0000
Subject: [PATCH 188/451] Update some file changes, but there's a dependency
 loop so it doesn't quite work rigtht now.

llvm-svn: 366156
---
 lldb/lldb.xcodeproj/project.pbxproj | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/lldb/lldb.xcodeproj/project.pbxproj b/lldb/lldb.xcodeproj/project.pbxproj
index 11b3b3ea87ed5..c02abef66a753 100644
--- a/lldb/lldb.xcodeproj/project.pbxproj
+++ b/lldb/lldb.xcodeproj/project.pbxproj
@@ -162,7 +162,7 @@
 		2689007913353E1A00698AC0 /* CFCMutableDictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7EF310F1B8AD00F91463 /* CFCMutableDictionary.cpp */; };
 		2689007A13353E1A00698AC0 /* CFCMutableSet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7EF510F1B8AD00F91463 /* CFCMutableSet.cpp */; };
 		2689007B13353E1A00698AC0 /* CFCString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7EF810F1B8AD00F91463 /* CFCString.cpp */; };
-		268900E913353E6F00698AC0 /* CPPLanguageRuntime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CB443BC1249920C00C13DC2 /* CPPLanguageRuntime.cpp */; };
+		AF9E360C22DD3BFC000B7776 /* CPPLanguageRuntime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AF9E360B22DD3BFB000B7776 /* CPPLanguageRuntime.cpp */; };
 		94B6385D1B8FB178004FE1E4 /* CPlusPlusLanguage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 94B6385B1B8FB174004FE1E4 /* CPlusPlusLanguage.cpp */; };
 		23CB15341D66DA9300EDDDE1 /* CPlusPlusLanguageTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 23CB14FA1D66CCF100EDDDE1 /* CPlusPlusLanguageTest.cpp */; };
 		49F811F31E931B2100F4E163 /* CPlusPlusNameParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49F811EF1E931B1500F4E163 /* CPlusPlusNameParser.cpp */; };
@@ -379,7 +379,6 @@
 		260A63191861009E00FECF8E /* IOHandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 260A63181861009E00FECF8E /* IOHandler.cpp */; };
 		260A63171861008E00FECF8E /* IOHandler.h in Headers */ = {isa = PBXBuildFile; fileRef = 260A63161861008E00FECF8E /* IOHandler.h */; };
 		236124A41986B4E2004EFC37 /* IOObject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 236124A21986B4E2004EFC37 /* IOObject.cpp */; };
-		2689006A13353E0E00698AC0 /* IRDynamicChecks.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49CF9829122C70BD007A0B96 /* IRDynamicChecks.cpp */; };
 		2689006D13353E0E00698AC0 /* IRExecutionUnit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C98D3DB118FB96F00E575D0 /* IRExecutionUnit.cpp */; };
 		2689006B13353E0E00698AC0 /* IRForTarget.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49307AAD11DEA4D90081F992 /* IRForTarget.cpp */; };
 		49A71FE7141FFA5C00D59478 /* IRInterpreter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 496B01581406DE8900F830D5 /* IRInterpreter.cpp */; };
@@ -1563,8 +1562,7 @@
 		26792617211CA3E100EE1D10 /* CMakeLists.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = CMakeLists.txt; path = "tools/lldb-vscode/CMakeLists.txt"; sourceTree = "<group>"; };
 		9A1890311F47D5D400394BCA /* CMakeLists.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = CMakeLists.txt; path = TestingSupport/CMakeLists.txt; sourceTree = "<group>"; };
 		AF352EDD22C17BD700D058B6 /* CMakeLists.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = CMakeLists.txt; sourceTree = "<group>"; };
-		4CB443BC1249920C00C13DC2 /* CPPLanguageRuntime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CPPLanguageRuntime.cpp; path = source/Target/CPPLanguageRuntime.cpp; sourceTree = "<group>"; };
-		4CB443BB1249920C00C13DC2 /* CPPLanguageRuntime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CPPLanguageRuntime.h; path = include/lldb/Target/CPPLanguageRuntime.h; sourceTree = "<group>"; };
+		AF9E360B22DD3BFB000B7776 /* CPPLanguageRuntime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPPLanguageRuntime.cpp; sourceTree = "<group>"; };
 		94B6385B1B8FB174004FE1E4 /* CPlusPlusLanguage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = CPlusPlusLanguage.cpp; path = Language/CPlusPlus/CPlusPlusLanguage.cpp; sourceTree = "<group>"; };
 		94B6385C1B8FB174004FE1E4 /* CPlusPlusLanguage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = CPlusPlusLanguage.h; path = Language/CPlusPlus/CPlusPlusLanguage.h; sourceTree = "<group>"; };
 		23CB14FA1D66CCF100EDDDE1 /* CPlusPlusLanguageTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPlusPlusLanguageTest.cpp; sourceTree = "<group>"; };
@@ -2013,8 +2011,6 @@
 		236124A21986B4E2004EFC37 /* IOObject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IOObject.cpp; path = source/Utility/IOObject.cpp; sourceTree = "<group>"; };
 		236124A61986B50E004EFC37 /* IOObject.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = IOObject.h; path = include/lldb/Utility/IOObject.h; sourceTree = "<group>"; };
 		26BC7D6510F1B77400F91463 /* IOStreamMacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IOStreamMacros.h; path = include/lldb/Core/IOStreamMacros.h; sourceTree = "<group>"; };
-		49CF9829122C70BD007A0B96 /* IRDynamicChecks.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IRDynamicChecks.cpp; path = source/Expression/IRDynamicChecks.cpp; sourceTree = "<group>"; };
-		49CF9833122C718B007A0B96 /* IRDynamicChecks.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IRDynamicChecks.h; path = include/lldb/Expression/IRDynamicChecks.h; sourceTree = "<group>"; };
 		4C98D3DB118FB96F00E575D0 /* IRExecutionUnit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IRExecutionUnit.cpp; path = source/Expression/IRExecutionUnit.cpp; sourceTree = "<group>"; };
 		4C98D3E1118FB98F00E575D0 /* IRExecutionUnit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IRExecutionUnit.h; path = include/lldb/Expression/IRExecutionUnit.h; sourceTree = "<group>"; };
 		49307AAD11DEA4D90081F992 /* IRForTarget.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IRForTarget.cpp; path = ExpressionParser/Clang/IRForTarget.cpp; sourceTree = "<group>"; };
@@ -5494,8 +5490,6 @@
 				4C2479BE1BA39843009C9A7B /* ExpressionParser.h */,
 				26BC7DC310F1B79500F91463 /* DWARFExpression.h */,
 				26BC7ED810F1B86700F91463 /* DWARFExpression.cpp */,
-				49CF9833122C718B007A0B96 /* IRDynamicChecks.h */,
-				49CF9829122C70BD007A0B96 /* IRDynamicChecks.cpp */,
 				49C66B1C17011A43004D1922 /* IRMemoryMap.h */,
 				49DCF6FD170E6B4A0092F75E /* IRMemoryMap.cpp */,
 				4C98D3E1118FB98F00E575D0 /* IRExecutionUnit.h */,
@@ -5679,8 +5673,6 @@
 				23EDE3301926839700F6A132 /* NativeRegisterContext.cpp */,
 				497E7B331188ED300065CCA1 /* ABI.h */,
 				497E7B9D1188F6690065CCA1 /* ABI.cpp */,
-				4CB443BB1249920C00C13DC2 /* CPPLanguageRuntime.h */,
-				4CB443BC1249920C00C13DC2 /* CPPLanguageRuntime.cpp */,
 				26BC7DF110F1B81A00F91463 /* DynamicLoader.h */,
 				26BC7E7710F1B85900F91463 /* DynamicLoader.cpp */,
 				26BC7DF210F1B81A00F91463 /* ExecutionContext.h */,
@@ -6272,6 +6264,7 @@
 		4CCA643B13B40B82003BDF98 /* CPlusPlus */ = {
 			isa = PBXGroup;
 			children = (
+				AF9E360B22DD3BFB000B7776 /* CPPLanguageRuntime.cpp */,
 				4CCA643C13B40B82003BDF98 /* ItaniumABI */,
 			);
 			path = CPlusPlus;
@@ -8127,6 +8120,7 @@
 				AFF81FB320D1CC910010F95E /* PlatformiOSSimulatorCoreSimulatorSupport.mm in Sources */,
 				942612F71B95000000EF842E /* LanguageCategory.cpp in Sources */,
 				AF116BEF20CF234B0071093F /* DebugNamesDWARFIndex.cpp in Sources */,
+				AF9E360C22DD3BFC000B7776 /* CPPLanguageRuntime.cpp in Sources */,
 				2689005E13353E0E00698AC0 /* ClangASTSource.cpp in Sources */,
 				AFF1273622276F1600C25726 /* LocateSymbolFile.cpp in Sources */,
 				2689005F13353E0E00698AC0 /* ClangFunctionCaller.cpp in Sources */,
@@ -8145,7 +8139,6 @@
 				AFC234091AF85CE100CDE8B6 /* CommandObjectLanguage.cpp in Sources */,
 				2689006813353E0E00698AC0 /* ASTResultSynthesizer.cpp in Sources */,
 				2689006913353E0E00698AC0 /* ASTStructExtractor.cpp in Sources */,
-				2689006A13353E0E00698AC0 /* IRDynamicChecks.cpp in Sources */,
 				2689006B13353E0E00698AC0 /* IRForTarget.cpp in Sources */,
 				AF2BA6EC1A707E3400C5248A /* UriParser.cpp in Sources */,
 				2689006D13353E0E00698AC0 /* IRExecutionUnit.cpp in Sources */,
@@ -8321,7 +8314,6 @@
 				268900E813353E6F00698AC0 /* ABI.cpp in Sources */,
 				4C56543119D1EFAA002E9C44 /* ThreadPlanPython.cpp in Sources */,
 				26AB92121819D74600E63F3E /* DWARFDataExtractor.cpp in Sources */,
-				268900E913353E6F00698AC0 /* CPPLanguageRuntime.cpp in Sources */,
 				4CD44D4220B777850003557C /* DWARFBaseDIE.cpp in Sources */,
 				9485545A1DCBAE3B00345FF5 /* RenderScriptScriptGroup.cpp in Sources */,
 				268900EA13353E6F00698AC0 /* DynamicLoader.cpp in Sources */,

From bf20b2ace68d300665cf920050fda50003bd1096 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Tue, 16 Jul 2019 00:02:40 +0000
Subject: [PATCH 189/451] Temporarily revert "add -fthinlto-index= option to
 clang-cl"

This is causing testsuite failures on (at least) darwin release+asserts.

This reverts commit r366146.

llvm-svn: 366157
---
 clang/include/clang/Driver/Options.td  | 2 +-
 clang/test/Driver/cl-thinlto-backend.c | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)
 delete mode 100644 clang/test/Driver/cl-thinlto-backend.c

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index dfd27fab796e3..957483c318647 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1270,7 +1270,7 @@ def flto_jobs_EQ : Joined<["-"], "flto-jobs=">,
            "of 0 means the number of threads will be derived from "
            "the number of CPUs detected)">;
 def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">,
-  Flags<[CoreOption, CC1Option]>, Group<f_Group>,
+  Flags<[CC1Option]>, Group<f_Group>,
   HelpText<"Perform ThinLTO importing using provided function summary index">;
 def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
                                 Group<f_Group>, Flags<[DriverOption, CoreOption]>;
diff --git a/clang/test/Driver/cl-thinlto-backend.c b/clang/test/Driver/cl-thinlto-backend.c
deleted file mode 100644
index a948c4ea33d9c..0000000000000
--- a/clang/test/Driver/cl-thinlto-backend.c
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: %clang_cl -c -flto=thin -Fo%t.obj %s
-// RUN: llvm-lto2 run -thinlto-distributed-indexes -o %t.exe %t.obj
-
-// -fthinlto_index should be passed to cc1
-// RUN: %clang_cl -### -c -fthinlto-index=%t.thinlto.bc -Fo%t1.obj \
-// RUN:     %t.obj 2>&1 | FileCheck %s
-
-// CHECK: -fthinlto-index=
-// CHECK: "-x" "ir"

From 509903e887263182f580ae75c7fee2eaedb66fae Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Jul 2019 00:37:17 +0000
Subject: [PATCH 190/451] [DebugInfo] Move function from line table to the
 prologue (NFC)

In LLDB, when parsing type units, we don't need to parse the whole line
table. Instead, we only need to parse the "support files" from the line
table prologue.

To make that possible, this patch moves the respective functions from
the LineTable into the Prologue. Because I don't think users of the
LineTable should have to know that these files come from the Prologue,

I've left the original methods in place, and made them redirect to the
LineTable.

Differential revision: https://reviews.llvm.org/D64774

llvm-svn: 366158
---
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     | 26 ++++---
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   | 67 +++++++++----------
 2 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 9a3ad2b08c99b..e7425c1923737 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -121,6 +121,17 @@ class DWARFDebugLine {
       return LineBase + (int8_t)LineRange - 1;
     }
 
+    /// Get DWARF-version aware access to the file name entry at the provided
+    /// index.
+    const llvm::DWARFDebugLine::FileNameEntry &
+    getFileNameEntry(uint64_t Index) const;
+
+    bool hasFileAtIndex(uint64_t FileIndex) const;
+
+    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
+                            DILineInfoSpecifier::FileLineInfoKind Kind,
+                            std::string &Result) const;
+
     void clear();
     void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
     Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
@@ -240,16 +251,20 @@ class DWARFDebugLine {
     bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
                             std::vector<uint32_t> &Result) const;
 
-    bool hasFileAtIndex(uint64_t FileIndex) const;
+    bool hasFileAtIndex(uint64_t FileIndex) const {
+      return Prologue.hasFileAtIndex(FileIndex);
+    }
 
     /// Extracts filename by its index in filename table in prologue.
     /// In Dwarf 4, the files are 1-indexed and the current compilation file
     /// name is not represented in the list. In DWARF v5, the files are
     /// 0-indexed and the primary source file has the index 0.
     /// Returns true on success.
-    bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
+    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
                             DILineInfoSpecifier::FileLineInfoKind Kind,
-                            std::string &Result) const;
+                            std::string &Result) const {
+      return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
+    }
 
     /// Fills the Result argument with the file and line information
     /// corresponding to Address. Returns true on success.
@@ -268,11 +283,6 @@ class DWARFDebugLine {
         std::function<void(Error)> RecoverableErrorCallback,
         raw_ostream *OS = nullptr);
 
-    /// Get DWARF-version aware access to the file name entry at the provided
-    /// index.
-    const llvm::DWARFDebugLine::FileNameEntry &
-        getFileNameEntry(uint64_t Index) const;
-
     using RowVector = std::vector<Row>;
     using RowIter = RowVector::const_iterator;
     using SequenceVector = std::vector<Sequence>;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 9f9aaabf1e89d..8a621084710e4 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -66,6 +66,26 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType(
 
 DWARFDebugLine::Prologue::Prologue() { clear(); }
 
+bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const {
+  uint16_t DwarfVersion = getVersion();
+  assert(DwarfVersion != 0 &&
+         "line table prologue has no dwarf version information");
+  if (DwarfVersion >= 5)
+    return FileIndex < FileNames.size();
+  return FileIndex != 0 && FileIndex <= FileNames.size();
+}
+
+const llvm::DWARFDebugLine::FileNameEntry &
+DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const {
+  uint16_t DwarfVersion = getVersion();
+  assert(DwarfVersion != 0 &&
+         "line table prologue has no dwarf version information");
+  // In DWARF v5 the file names are 0-indexed.
+  if (DwarfVersion >= 5)
+    return FileNames[Index];
+  return FileNames[Index - 1];
+}
+
 void DWARFDebugLine::Prologue::clear() {
   TotalLength = PrologueLength = 0;
   SegSelectorSize = 0;
@@ -968,30 +988,11 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
   return true;
 }
 
-bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
-  uint16_t DwarfVersion = Prologue.getVersion();
-  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
-  if (DwarfVersion >= 5)
-    return FileIndex < Prologue.FileNames.size();
-  return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
-}
-
-const llvm::DWARFDebugLine::FileNameEntry &
-DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const {
-  uint16_t DwarfVersion = Prologue.getVersion();
-  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
-  // In DWARF v5 the file names are 0-indexed.
-  if (DwarfVersion >= 5)
-    return Prologue.FileNames[Index];
-  else
-    return Prologue.FileNames[Index - 1];
-}
-
 Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex,
                                                                 FileLineInfoKind Kind) const {
-  if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
+  if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex))
     return None;
-  const FileNameEntry &Entry = getFileNameEntry(FileIndex);
+  const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex);
   if (Optional<const char *> source = Entry.Source.getAsCString())
     return StringRef(*source);
   return None;
@@ -1005,10 +1006,10 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
          sys::path::is_absolute(Path, sys::path::Style::windows);
 }
 
-bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
-                                                   const char *CompDir,
-                                                   FileLineInfoKind Kind,
-                                                   std::string &Result) const {
+bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
+                                                  StringRef CompDir,
+                                                  FileLineInfoKind Kind,
+                                                  std::string &Result) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
   const FileNameEntry &Entry = getFileNameEntry(FileIndex);
@@ -1022,20 +1023,18 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
   SmallString<16> FilePath;
   StringRef IncludeDir;
   // Be defensive about the contents of Entry.
-  if (Prologue.getVersion() >= 5) {
-    if (Entry.DirIdx < Prologue.IncludeDirectories.size())
-      IncludeDir =
-          Prologue.IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
+  if (getVersion() >= 5) {
+    if (Entry.DirIdx < IncludeDirectories.size())
+      IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
   } else {
-    if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size())
-      IncludeDir = Prologue.IncludeDirectories[Entry.DirIdx - 1]
-                       .getAsCString()
-                       .getValue();
+    if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size())
+      IncludeDir =
+          IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue();
 
     // We may still need to append compilation directory of compile unit.
     // We know that FileName is not absolute, the only way to have an
     // absolute path at this point would be if IncludeDir is absolute.
-    if (CompDir && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
+    if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
       sys::path::append(FilePath, CompDir);
   }
 

From 50f0c824532886bea5b7536378a16ac9e3b12f9c Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Tue, 16 Jul 2019 00:57:50 +0000
Subject: [PATCH 191/451] Allow for vendor prefixes in a list test

Summary:
Preprocessor/init.c contains a line that explicitly checks for the
string

__VERSION__ "Clang{{.*}}

It's valid to have a toolchain configured to emit a vendor prefix
before the word Clang. e.g.

__VERSION__ "Vendor Clang{{.*}}

Subscribers: fedor.sergeev, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64772

llvm-svn: 366159
---
 clang/test/Preprocessor/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index fce85e05f63f5..d2d7efc0ae709 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -8169,7 +8169,7 @@
 // SPARC:#define __UINT_LEAST8_MAX__ 255
 // SPARC:#define __UINT_LEAST8_TYPE__ unsigned char
 // SPARC:#define __USER_LABEL_PREFIX__
-// SPARC:#define __VERSION__ "Clang{{.*}}
+// SPARC:#define __VERSION__ "{{.*}}Clang{{.*}}
 // SPARC:#define __WCHAR_MAX__ 2147483647
 // SPARC:#define __WCHAR_TYPE__ int
 // SPARC:#define __WCHAR_WIDTH__ 32

From 01ee172e9e4a32a326105b6817bb801a07f44bfa Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Jul 2019 00:59:04 +0000
Subject: [PATCH 192/451] Revert "[DebugInfo] Move function from line table to
 the prologue (NFC)"

This broke LLD, which I didn't have enabled.

llvm-svn: 366160
---
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     | 26 +++----
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   | 67 ++++++++++---------
 2 files changed, 42 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index e7425c1923737..9a3ad2b08c99b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -121,17 +121,6 @@ class DWARFDebugLine {
       return LineBase + (int8_t)LineRange - 1;
     }
 
-    /// Get DWARF-version aware access to the file name entry at the provided
-    /// index.
-    const llvm::DWARFDebugLine::FileNameEntry &
-    getFileNameEntry(uint64_t Index) const;
-
-    bool hasFileAtIndex(uint64_t FileIndex) const;
-
-    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
-                            DILineInfoSpecifier::FileLineInfoKind Kind,
-                            std::string &Result) const;
-
     void clear();
     void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
     Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
@@ -251,20 +240,16 @@ class DWARFDebugLine {
     bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
                             std::vector<uint32_t> &Result) const;
 
-    bool hasFileAtIndex(uint64_t FileIndex) const {
-      return Prologue.hasFileAtIndex(FileIndex);
-    }
+    bool hasFileAtIndex(uint64_t FileIndex) const;
 
     /// Extracts filename by its index in filename table in prologue.
     /// In Dwarf 4, the files are 1-indexed and the current compilation file
     /// name is not represented in the list. In DWARF v5, the files are
     /// 0-indexed and the primary source file has the index 0.
     /// Returns true on success.
-    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
+    bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
                             DILineInfoSpecifier::FileLineInfoKind Kind,
-                            std::string &Result) const {
-      return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
-    }
+                            std::string &Result) const;
 
     /// Fills the Result argument with the file and line information
     /// corresponding to Address. Returns true on success.
@@ -283,6 +268,11 @@ class DWARFDebugLine {
         std::function<void(Error)> RecoverableErrorCallback,
         raw_ostream *OS = nullptr);
 
+    /// Get DWARF-version aware access to the file name entry at the provided
+    /// index.
+    const llvm::DWARFDebugLine::FileNameEntry &
+        getFileNameEntry(uint64_t Index) const;
+
     using RowVector = std::vector<Row>;
     using RowIter = RowVector::const_iterator;
     using SequenceVector = std::vector<Sequence>;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 8a621084710e4..9f9aaabf1e89d 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -66,26 +66,6 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType(
 
 DWARFDebugLine::Prologue::Prologue() { clear(); }
 
-bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const {
-  uint16_t DwarfVersion = getVersion();
-  assert(DwarfVersion != 0 &&
-         "line table prologue has no dwarf version information");
-  if (DwarfVersion >= 5)
-    return FileIndex < FileNames.size();
-  return FileIndex != 0 && FileIndex <= FileNames.size();
-}
-
-const llvm::DWARFDebugLine::FileNameEntry &
-DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const {
-  uint16_t DwarfVersion = getVersion();
-  assert(DwarfVersion != 0 &&
-         "line table prologue has no dwarf version information");
-  // In DWARF v5 the file names are 0-indexed.
-  if (DwarfVersion >= 5)
-    return FileNames[Index];
-  return FileNames[Index - 1];
-}
-
 void DWARFDebugLine::Prologue::clear() {
   TotalLength = PrologueLength = 0;
   SegSelectorSize = 0;
@@ -988,11 +968,30 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
   return true;
 }
 
+bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
+  uint16_t DwarfVersion = Prologue.getVersion();
+  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
+  if (DwarfVersion >= 5)
+    return FileIndex < Prologue.FileNames.size();
+  return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
+}
+
+const llvm::DWARFDebugLine::FileNameEntry &
+DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const {
+  uint16_t DwarfVersion = Prologue.getVersion();
+  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
+  // In DWARF v5 the file names are 0-indexed.
+  if (DwarfVersion >= 5)
+    return Prologue.FileNames[Index];
+  else
+    return Prologue.FileNames[Index - 1];
+}
+
 Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex,
                                                                 FileLineInfoKind Kind) const {
-  if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex))
+  if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return None;
-  const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex);
+  const FileNameEntry &Entry = getFileNameEntry(FileIndex);
   if (Optional<const char *> source = Entry.Source.getAsCString())
     return StringRef(*source);
   return None;
@@ -1006,10 +1005,10 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
          sys::path::is_absolute(Path, sys::path::Style::windows);
 }
 
-bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
-                                                  StringRef CompDir,
-                                                  FileLineInfoKind Kind,
-                                                  std::string &Result) const {
+bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
+                                                   const char *CompDir,
+                                                   FileLineInfoKind Kind,
+                                                   std::string &Result) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
   const FileNameEntry &Entry = getFileNameEntry(FileIndex);
@@ -1023,18 +1022,20 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
   SmallString<16> FilePath;
   StringRef IncludeDir;
   // Be defensive about the contents of Entry.
-  if (getVersion() >= 5) {
-    if (Entry.DirIdx < IncludeDirectories.size())
-      IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
-  } else {
-    if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size())
+  if (Prologue.getVersion() >= 5) {
+    if (Entry.DirIdx < Prologue.IncludeDirectories.size())
       IncludeDir =
-          IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue();
+          Prologue.IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
+  } else {
+    if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size())
+      IncludeDir = Prologue.IncludeDirectories[Entry.DirIdx - 1]
+                       .getAsCString()
+                       .getValue();
 
     // We may still need to append compilation directory of compile unit.
     // We know that FileName is not absolute, the only way to have an
     // absolute path at this point would be if IncludeDir is absolute.
-    if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
+    if (CompDir && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
       sys::path::append(FilePath, CompDir);
   }
 

From 0d121273181f89b5296b02084fbb967d159b2c69 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Tue, 16 Jul 2019 01:02:32 +0000
Subject: [PATCH 193/451] [Target] Remove unused method
 Target::GetDefaultClangModuleSearchPaths

llvm-svn: 366161
---
 lldb/include/lldb/Target/Target.h | 2 --
 lldb/source/Target/Target.cpp     | 7 -------
 2 files changed, 9 deletions(-)

diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index 875a8b1e2c187..4ed11afc31ba6 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -491,8 +491,6 @@ class Target : public std::enable_shared_from_this<Target>,
 
   static FileSpecList GetDefaultDebugFileSearchPaths();
 
-  static FileSpecList GetDefaultClangModuleSearchPaths();
-
   static ArchSpec GetDefaultArchitecture();
 
   static void SetDefaultArchitecture(const ArchSpec &arch);
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index ac15749663f4c..4941cb585c554 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -2330,13 +2330,6 @@ FileSpecList Target::GetDefaultDebugFileSearchPaths() {
   return FileSpecList();
 }
 
-FileSpecList Target::GetDefaultClangModuleSearchPaths() {
-  TargetPropertiesSP properties_sp(Target::GetGlobalProperties());
-  if (properties_sp)
-    return properties_sp->GetClangModuleSearchPaths();
-  return FileSpecList();
-}
-
 ArchSpec Target::GetDefaultArchitecture() {
   TargetPropertiesSP properties_sp(Target::GetGlobalProperties());
   if (properties_sp)

From 543ba4e9e0c421bedaea2d8a0f1965092cec300e Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 16 Jul 2019 01:03:06 +0000
Subject: [PATCH 194/451] [InstructionSimplify] Apply sext/trunc after pointer
 stripping

Summary:
- As the pointer stripping could trace through `addrspacecast` now, need
  to sext/trunc the offset to ensure it has the same width as the
  pointer after stripping.

Reviewers: jdoerfert

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64768

llvm-svn: 366162
---
 llvm/lib/Analysis/InstructionSimplify.cpp    |  4 ++++
 llvm/test/Transforms/InstSimplify/compare.ll | 11 ++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c0d69f9275d1d..e34bf6f4e43f5 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -660,6 +660,10 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
   APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
 
   V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
+  // As that strip may trace through `addrspacecast`, need to sext or trunc
+  // the offset calculated.
+  IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
+  Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth());
 
   Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
   if (V->getType()->isVectorTy())
diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll
index 899f198d48a31..570239eaf0c6e 100644
--- a/llvm/test/Transforms/InstSimplify/compare.ll
+++ b/llvm/test/Transforms/InstSimplify/compare.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instsimplify -S | FileCheck %s
-target datalayout = "p:32:32"
+target datalayout = "p:32:32-p1:64:64"
 
 define i1 @ptrtoint() {
 ; CHECK-LABEL: @ptrtoint(
@@ -1358,4 +1358,13 @@ define i1 @constant_fold_null_inttoptr() {
   ret i1 %x
 }
 
+; CHECK-LABEL: @cmp_through_addrspacecast(
+; CHECK-NEXT: ret i1 true
+define i1 @cmp_through_addrspacecast(i32 addrspace(1)* %p1) {
+  %p0 = addrspacecast i32 addrspace(1)* %p1 to i32*
+  %p0.1 = getelementptr inbounds i32, i32* %p0, i64 1
+  %cmp = icmp ne i32* %p0, %p0.1
+  ret i1 %cmp
+}
+
 attributes #0 = { "null-pointer-is-valid"="true" }

From fa52e00c85ce3feeec14be34265781f721b966c0 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Tue, 16 Jul 2019 01:13:36 +0000
Subject: [PATCH 195/451] [Sema] Suppress additional warnings for C's zero
 initializer

Summary:
D28148 relaxed some checks for assigning { 0 } to a structure for all C
standards, but it failed to handle structures with non-integer
subobjects. Relax -Wmissing-braces checks for such structures, and add
some additional tests.

This fixes PR39931.

Patch By: al3xtjames

Reviewed By: Lekensteyn

Differential Revision: https://reviews.llvm.org/D61838

llvm-svn: 366163
---
 clang/lib/AST/Expr.cpp             | 4 ++--
 clang/test/Sema/zero-initializer.c | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index f8017bb7adebe..10ab2bf72b72e 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2303,11 +2303,11 @@ bool InitListExpr::isTransparent() const {
 bool InitListExpr::isIdiomaticZeroInitializer(const LangOptions &LangOpts) const {
   assert(isSyntacticForm() && "only test syntactic form as zero initializer");
 
-  if (LangOpts.CPlusPlus || getNumInits() != 1) {
+  if (LangOpts.CPlusPlus || getNumInits() != 1 || !getInit(0)) {
     return false;
   }
 
-  const IntegerLiteral *Lit = dyn_cast<IntegerLiteral>(getInit(0));
+  const IntegerLiteral *Lit = dyn_cast<IntegerLiteral>(getInit(0)->IgnoreImplicit());
   return Lit && Lit->getValue() == 0;
 }
 
diff --git a/clang/test/Sema/zero-initializer.c b/clang/test/Sema/zero-initializer.c
index 0ab410d4c6d55..e54021a582c52 100644
--- a/clang/test/Sema/zero-initializer.c
+++ b/clang/test/Sema/zero-initializer.c
@@ -7,6 +7,8 @@ struct A { int a; };
 struct B { struct A a; };
 struct C { struct B b; };
 struct D { struct C c; int n; };
+struct E { short e; };
+struct F { struct E e; int n; };
 
 int main(void)
 {
@@ -23,6 +25,9 @@ int main(void)
   struct C p = { 0 }; // no-warning
   struct C q = { 9 }; // warning suppressed for struct with single element
   struct D r = { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}}
+  struct F s = { 0 }; // no-warning
+  struct F t = { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}}
+
   f = (struct foo ) { 0 }; // no-warning
   g = (struct foo ) { 9 }; // expected-warning {{missing field 'y' initializer}}
   h = (struct foo ) { 9, 9 }; // no-warning
@@ -36,6 +41,8 @@ int main(void)
   p = (struct C) { 0 }; // no-warning
   q = (struct C) { 9 }; // warning suppressed for struct with single element
   r = (struct D) { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}}
+  s = (struct F) { 0 }; // no-warning
+  t = (struct F) { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}}
 
   return 0;
 }

From ca16d280f7ed9509398ba8869a123c9f8ede72f7 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Jul 2019 01:21:25 +0000
Subject: [PATCH 196/451] Re-land "[DebugInfo] Move function from line table to
 the prologue (NFC)"

In LLDB, when parsing type units, we don't need to parse the whole line
table. Instead, we only need to parse the "support files" from the line
table prologue.

To make that possible, this patch moves the respective functions from
the LineTable into the Prologue. Because I don't think users of the
LineTable should have to know that these files come from the Prologue,

I've left the original methods in place, and made them redirect to the
LineTable.

Differential revision: https://reviews.llvm.org/D64774

llvm-svn: 366164
---
 lld/ELF/InputFiles.cpp                        |  2 +-
 .../llvm/DebugInfo/DWARF/DWARFDebugLine.h     | 26 ++++---
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   | 67 +++++++++----------
 3 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 470d877f3fbf3..89b178decba2a 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -320,7 +320,7 @@ ObjFile<ELFT>::getVariableLoc(StringRef name) {
   // Take file name string from line table.
   std::string fileName;
   if (!it->second.lt->getFileNameByIndex(
-          it->second.file, nullptr,
+          it->second.file, {},
           DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, fileName))
     return None;
 
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 9a3ad2b08c99b..e7425c1923737 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -121,6 +121,17 @@ class DWARFDebugLine {
       return LineBase + (int8_t)LineRange - 1;
     }
 
+    /// Get DWARF-version aware access to the file name entry at the provided
+    /// index.
+    const llvm::DWARFDebugLine::FileNameEntry &
+    getFileNameEntry(uint64_t Index) const;
+
+    bool hasFileAtIndex(uint64_t FileIndex) const;
+
+    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
+                            DILineInfoSpecifier::FileLineInfoKind Kind,
+                            std::string &Result) const;
+
     void clear();
     void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const;
     Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr,
@@ -240,16 +251,20 @@ class DWARFDebugLine {
     bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size,
                             std::vector<uint32_t> &Result) const;
 
-    bool hasFileAtIndex(uint64_t FileIndex) const;
+    bool hasFileAtIndex(uint64_t FileIndex) const {
+      return Prologue.hasFileAtIndex(FileIndex);
+    }
 
     /// Extracts filename by its index in filename table in prologue.
     /// In Dwarf 4, the files are 1-indexed and the current compilation file
     /// name is not represented in the list. In DWARF v5, the files are
     /// 0-indexed and the primary source file has the index 0.
     /// Returns true on success.
-    bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
+    bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir,
                             DILineInfoSpecifier::FileLineInfoKind Kind,
-                            std::string &Result) const;
+                            std::string &Result) const {
+      return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result);
+    }
 
     /// Fills the Result argument with the file and line information
     /// corresponding to Address. Returns true on success.
@@ -268,11 +283,6 @@ class DWARFDebugLine {
         std::function<void(Error)> RecoverableErrorCallback,
         raw_ostream *OS = nullptr);
 
-    /// Get DWARF-version aware access to the file name entry at the provided
-    /// index.
-    const llvm::DWARFDebugLine::FileNameEntry &
-        getFileNameEntry(uint64_t Index) const;
-
     using RowVector = std::vector<Row>;
     using RowIter = RowVector::const_iterator;
     using SequenceVector = std::vector<Sequence>;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 9f9aaabf1e89d..8a621084710e4 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -66,6 +66,26 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType(
 
 DWARFDebugLine::Prologue::Prologue() { clear(); }
 
+bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const {
+  uint16_t DwarfVersion = getVersion();
+  assert(DwarfVersion != 0 &&
+         "line table prologue has no dwarf version information");
+  if (DwarfVersion >= 5)
+    return FileIndex < FileNames.size();
+  return FileIndex != 0 && FileIndex <= FileNames.size();
+}
+
+const llvm::DWARFDebugLine::FileNameEntry &
+DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const {
+  uint16_t DwarfVersion = getVersion();
+  assert(DwarfVersion != 0 &&
+         "line table prologue has no dwarf version information");
+  // In DWARF v5 the file names are 0-indexed.
+  if (DwarfVersion >= 5)
+    return FileNames[Index];
+  return FileNames[Index - 1];
+}
+
 void DWARFDebugLine::Prologue::clear() {
   TotalLength = PrologueLength = 0;
   SegSelectorSize = 0;
@@ -968,30 +988,11 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl(
   return true;
 }
 
-bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
-  uint16_t DwarfVersion = Prologue.getVersion();
-  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
-  if (DwarfVersion >= 5)
-    return FileIndex < Prologue.FileNames.size();
-  return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
-}
-
-const llvm::DWARFDebugLine::FileNameEntry &
-DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const {
-  uint16_t DwarfVersion = Prologue.getVersion();
-  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
-  // In DWARF v5 the file names are 0-indexed.
-  if (DwarfVersion >= 5)
-    return Prologue.FileNames[Index];
-  else
-    return Prologue.FileNames[Index - 1];
-}
-
 Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex,
                                                                 FileLineInfoKind Kind) const {
-  if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
+  if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex))
     return None;
-  const FileNameEntry &Entry = getFileNameEntry(FileIndex);
+  const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex);
   if (Optional<const char *> source = Entry.Source.getAsCString())
     return StringRef(*source);
   return None;
@@ -1005,10 +1006,10 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
          sys::path::is_absolute(Path, sys::path::Style::windows);
 }
 
-bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
-                                                   const char *CompDir,
-                                                   FileLineInfoKind Kind,
-                                                   std::string &Result) const {
+bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex,
+                                                  StringRef CompDir,
+                                                  FileLineInfoKind Kind,
+                                                  std::string &Result) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
   const FileNameEntry &Entry = getFileNameEntry(FileIndex);
@@ -1022,20 +1023,18 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
   SmallString<16> FilePath;
   StringRef IncludeDir;
   // Be defensive about the contents of Entry.
-  if (Prologue.getVersion() >= 5) {
-    if (Entry.DirIdx < Prologue.IncludeDirectories.size())
-      IncludeDir =
-          Prologue.IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
+  if (getVersion() >= 5) {
+    if (Entry.DirIdx < IncludeDirectories.size())
+      IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue();
   } else {
-    if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size())
-      IncludeDir = Prologue.IncludeDirectories[Entry.DirIdx - 1]
-                       .getAsCString()
-                       .getValue();
+    if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size())
+      IncludeDir =
+          IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue();
 
     // We may still need to append compilation directory of compile unit.
     // We know that FileName is not absolute, the only way to have an
     // absolute path at this point would be if IncludeDir is absolute.
-    if (CompDir && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
+    if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir))
       sys::path::append(FilePath, CompDir);
   }
 

From 492ce8cc8b0831924f7f0479f37f2e708a8dad3f Mon Sep 17 00:00:00 2001
From: Bob Haarman <llvm@inglorion.net>
Date: Tue, 16 Jul 2019 01:35:49 +0000
Subject: [PATCH 197/451] reland "add -fthinlto-index= option to clang-cl"

Summary:
This is a reland of r366146, adding in the previously missing '--'
flag that prevents filenames from being interpreted as flags.

Original description:
This adds a -fthinlto-index= option to clang-cl, which allows it to
be used to drive ThinLTO backend passes. This allows clang-cl to be
used for distributed ThinLTO.

Tags: #clang
llvm-svn: 366165
---
 clang/include/clang/Driver/Options.td  | 2 +-
 clang/test/Driver/cl-thinlto-backend.c | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Driver/cl-thinlto-backend.c

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 957483c318647..dfd27fab796e3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1270,7 +1270,7 @@ def flto_jobs_EQ : Joined<["-"], "flto-jobs=">,
            "of 0 means the number of threads will be derived from "
            "the number of CPUs detected)">;
 def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">,
-  Flags<[CC1Option]>, Group<f_Group>,
+  Flags<[CoreOption, CC1Option]>, Group<f_Group>,
   HelpText<"Perform ThinLTO importing using provided function summary index">;
 def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
                                 Group<f_Group>, Flags<[DriverOption, CoreOption]>;
diff --git a/clang/test/Driver/cl-thinlto-backend.c b/clang/test/Driver/cl-thinlto-backend.c
new file mode 100644
index 0000000000000..4697a994906ab
--- /dev/null
+++ b/clang/test/Driver/cl-thinlto-backend.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cl -c -flto=thin -Fo%t.obj -- %s
+// RUN: llvm-lto2 run -thinlto-distributed-indexes -o %t.exe %t.obj
+
+// -fthinlto_index should be passed to cc1
+// RUN: %clang_cl -### -c -fthinlto-index=%t.thinlto.bc -Fo%t1.obj \
+// RUN:     -- %t.obj 2>&1 | FileCheck %s
+
+// CHECK: -fthinlto-index=
+// CHECK: "-x" "ir"

From 49e14cefbe42262b0dc6e4daa8ac4e48fed8cec5 Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Tue, 16 Jul 2019 02:05:52 +0000
Subject: [PATCH 198/451] Change a lit test to permit vendor specific clang
 version

A test manually checks for the string `__VERSION__ "Clang`. This needs
to permit vendor specific variants.

llvm-svn: 366166
---
 clang/test/Preprocessor/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c
index d2d7efc0ae709..954f02a014344 100644
--- a/clang/test/Preprocessor/init.c
+++ b/clang/test/Preprocessor/init.c
@@ -9041,7 +9041,7 @@
 // X86_64-CLOUDABI:#define __UINT_LEAST8_MAX__ 255
 // X86_64-CLOUDABI:#define __UINT_LEAST8_TYPE__ unsigned char
 // X86_64-CLOUDABI:#define __USER_LABEL_PREFIX__
-// X86_64-CLOUDABI:#define __VERSION__ "Clang{{.*}}
+// X86_64-CLOUDABI:#define __VERSION__ "{{.*}}Clang{{.*}}
 // X86_64-CLOUDABI:#define __WCHAR_MAX__ 2147483647
 // X86_64-CLOUDABI:#define __WCHAR_TYPE__ int
 // X86_64-CLOUDABI:#define __WCHAR_WIDTH__ 32

From 1739b700b17c93f6ad21cea2fc7e3febba51d22c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 02:46:05 +0000
Subject: [PATCH 199/451] AMDGPU: Avoid code predicates for extload PatFrags

Use the MemoryVT field. This will be necessary for tablegen to
automatically handle patterns for GlobalISel.

Doesn't handle the d16 lo/hi patterns. Those are a special case since
it involvess the custom node type.

llvm-svn: 366168
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  | 45 +++++++------------
 llvm/lib/Target/AMDGPU/BUFInstructions.td     | 27 +++++++----
 .../Target/AMDGPU/EvergreenInstructions.td    |  4 --
 llvm/lib/Target/AMDGPU/FLATInstructions.td    | 18 +++++---
 llvm/lib/Target/AMDGPU/R600Instructions.td    | 26 +++++++++++
 5 files changed, 72 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index b29b0e7b17e3d..9e9510e0fa4a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -379,27 +379,6 @@ class FlatStoreAddress : CodePatPred<[{
          AS == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
-class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
-                                              (ld_node node:$ptr), [{
-  LoadSDNode *L = cast<LoadSDNode>(N);
-  return L->getExtensionType() == ISD::ZEXTLOAD ||
-         L->getExtensionType() == ISD::EXTLOAD;
-}]>;
-
-def az_extload : AZExtLoadBase <unindexedload>;
-
-def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
-  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
 class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
 class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
 
@@ -419,9 +398,11 @@ class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
 
 
 def load_private : PrivateLoad <load>;
-def az_extloadi8_private : PrivateLoad <az_extloadi8>;
+def extloadi8_private : PrivateLoad <extloadi8>;
+def zextloadi8_private : PrivateLoad <zextloadi8>;
 def sextloadi8_private : PrivateLoad <sextloadi8>;
-def az_extloadi16_private : PrivateLoad <az_extloadi16>;
+def extloadi16_private : PrivateLoad <extloadi16>;
+def zextloadi16_private : PrivateLoad <zextloadi16>;
 def sextloadi16_private : PrivateLoad <sextloadi16>;
 
 def store_private : PrivateStore <store>;
@@ -433,9 +414,11 @@ def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
 
 def load_global : GlobalLoad <load>;
 def sextloadi8_global : GlobalLoad <sextloadi8>;
-def az_extloadi8_global : GlobalLoad <az_extloadi8>;
+def extloadi8_global : GlobalLoad <extloadi8>;
+def zextloadi8_global : GlobalLoad <zextloadi8>;
 def sextloadi16_global : GlobalLoad <sextloadi16>;
-def az_extloadi16_global : GlobalLoad <az_extloadi16>;
+def extloadi16_global : GlobalLoad <extloadi16>;
+def zextloadi16_global : GlobalLoad <zextloadi16>;
 def atomic_load_global : GlobalLoad<atomic_load>;
 
 def store_global : GlobalStore <store>;
@@ -479,9 +462,11 @@ def store_align16_local : Aligned16Bytes <
 >;
 
 def load_flat          : FlatLoad <load>;
-def az_extloadi8_flat  : FlatLoad <az_extloadi8>;
+def extloadi8_flat  : FlatLoad <extloadi8>;
+def zextloadi8_flat  : FlatLoad <zextloadi8>;
 def sextloadi8_flat    : FlatLoad <sextloadi8>;
-def az_extloadi16_flat : FlatLoad <az_extloadi16>;
+def extloadi16_flat : FlatLoad <extloadi16>;
+def zextloadi16_flat : FlatLoad <zextloadi16>;
 def sextloadi16_flat   : FlatLoad <sextloadi16>;
 def atomic_load_flat   : FlatLoad<atomic_load>;
 
@@ -495,9 +480,11 @@ def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
 
 def constant_load : ConstantLoad<load>;
 def sextloadi8_constant : ConstantLoad <sextloadi8>;
-def az_extloadi8_constant : ConstantLoad <az_extloadi8>;
+def extloadi8_constant : ConstantLoad <extloadi8>;
+def zextloadi8_constant : ConstantLoad <zextloadi8>;
 def sextloadi16_constant : ConstantLoad <sextloadi16>;
-def az_extloadi16_constant : ConstantLoad <az_extloadi16>;
+def extloadi16_constant : ConstantLoad <extloadi16>;
+def zextloadi16_constant : ConstantLoad <zextloadi16>;
 
 
 class local_binary_atomic_op<SDNode atomic_op> :
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 7d9ca59c6d08a..4ff9aeb2e314e 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -851,9 +851,11 @@ defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
   "buffer_load_dwordx4", VReg_128, v4i32
 >;
 
-defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, az_extloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
-defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, az_extloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>;
@@ -1437,9 +1439,11 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins
 
 let SubtargetPredicate = isGFX6GFX7 in {
 def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
-def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, zextloadi8_constant>;
 def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
-def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, extloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, zextloadi16_constant>;
 
 defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_global>;
 defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_global>;
@@ -1458,9 +1462,11 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
 let OtherPredicates = [Has16BitInsts] in {
 
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_constant>;
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_global>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_global>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_global>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>;
 
 defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>;
 
@@ -1497,11 +1503,14 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
 }
 
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, az_extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, zextloadi8_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
-defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, az_extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
 defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index fbe7d757f3513..0550092ce1d6e 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -30,10 +30,6 @@ class EGOrCaymanPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
   let SubtargetPredicate = isEGorCayman;
 }
 
-// FIXME: These are deprecated
-def az_extloadi8_local : LocalLoad <az_extloadi8>;
-def az_extloadi16_local : LocalLoad <az_extloadi16>;
-
 //===----------------------------------------------------------------------===//
 // Evergreen / Cayman store instructions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index df334790b858f..4070d94dd4ab2 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -767,11 +767,14 @@ class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
 
 let OtherPredicates = [HasFlatAddressSpace] in {
 
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
 def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
 def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
@@ -849,11 +852,14 @@ def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd
 
 let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
 
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, az_extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
-def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
+def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
 
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index dcbedbd8cfd68..d3ce7ffd673c5 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -295,6 +295,32 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
   let VTXInst = 1;
 }
 
+// FIXME: Deprecated.
+class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
+                                              (ld_node node:$ptr), [{
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  return L->getExtensionType() == ISD::ZEXTLOAD ||
+         L->getExtensionType() == ISD::EXTLOAD;
+}]>;
+
+def az_extload : AZExtLoadBase <unindexedload>;
+
+def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// FIXME: These are deprecated
+def az_extloadi8_local : LocalLoad <az_extloadi8>;
+def az_extloadi16_local : LocalLoad <az_extloadi16>;
+
 class LoadParamFrag <PatFrag load_type> : PatFrag <
   (ops node:$ptr), (load_type node:$ptr),
   [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||

From e7e8789a632f3dcd029b0f78230e61773bdb3586 Mon Sep 17 00:00:00 2001
From: Ali Tamur <tamur@google.com>
Date: Tue, 16 Jul 2019 03:20:15 +0000
Subject: [PATCH 200/451] Revert "[OPENMP]Add support for analysis of if
 clauses."

This reverts commit rL366068.
The patch broke 86 tests under clang/test/OpenMP/ when run with address sanitizer.

llvm-svn: 366169
---
 clang/include/clang/AST/OpenMPClause.h        |   7 +-
 clang/lib/AST/OpenMPClause.cpp                |  19 -
 clang/test/Analysis/cfg-openmp.cpp            | 532 ++++++++----------
 clang/test/OpenMP/cancel_if_messages.cpp      |  10 -
 .../distribute_parallel_for_if_messages.cpp   |   7 -
 ...stribute_parallel_for_simd_if_messages.cpp |   7 -
 .../test/OpenMP/parallel_for_if_messages.cpp  |   7 -
 .../OpenMP/parallel_for_simd_if_messages.cpp  |   7 -
 clang/test/OpenMP/parallel_if_messages.cpp    |   7 -
 .../OpenMP/parallel_sections_if_messages.cpp  |   8 -
 clang/test/OpenMP/target_data_if_messages.cpp |   7 -
 .../OpenMP/target_enter_data_if_messages.cpp  |   7 -
 .../OpenMP/target_exit_data_if_messages.cpp   |   7 -
 clang/test/OpenMP/target_if_messages.cpp      |   7 -
 .../target_parallel_for_if_messages.cpp       |   7 -
 .../target_parallel_for_simd_if_messages.cpp  |   7 -
 .../OpenMP/target_parallel_if_messages.cpp    |   7 -
 clang/test/OpenMP/target_simd_if_messages.cpp |   7 -
 .../target_teams_distribute_if_messages.cpp   |   7 -
 ...ms_distribute_parallel_for_if_messages.cpp |   7 -
 ...stribute_parallel_for_simd_if_messages.cpp |   8 -
 ...rget_teams_distribute_simd_if_messages.cpp |   7 -
 .../test/OpenMP/target_teams_if_messages.cpp  |   7 -
 .../test/OpenMP/target_update_if_messages.cpp |   7 -
 clang/test/OpenMP/task_if_messages.cpp        |   7 -
 ...ms_distribute_parallel_for_if_messages.cpp |   8 -
 ...stribute_parallel_for_simd_if_messages.cpp |   8 -
 27 files changed, 239 insertions(+), 494 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index eadcc62a34575..c6daf73a623bd 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -501,10 +501,11 @@ class OMPIfClause : public OMPClause, public OMPClauseWithPreInit {
     return const_child_range(&Condition, &Condition + 1);
   }
 
-  child_range used_children();
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
   const_child_range used_children() const {
-    auto Children = const_cast<OMPIfClause *>(this)->used_children();
-    return const_child_range(Children.begin(), Children.end());
+    return const_child_range(const_child_iterator(), const_child_iterator());
   }
 
   static bool classof(const OMPClause *T) {
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 9d8a7ebc3023e..41520b380276c 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -209,25 +209,6 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C)
   return nullptr;
 }
 
-/// Gets the address of the original, non-captured, expression used in the
-/// clause as the preinitializer.
-static Stmt **getAddrOfExprAsWritten(Stmt *S) {
-  if (!S)
-    return nullptr;
-  if (auto *DS = dyn_cast<DeclStmt>(S)) {
-    assert(DS->isSingleDecl() && "Only single expression must be captured.");
-    if (auto *OED = dyn_cast<OMPCapturedExprDecl>(DS->getSingleDecl()))
-      return OED->getInitAddress();
-  }
-  return nullptr;
-}
-
-OMPClause::child_range OMPIfClause::used_children() {
-  if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt()))
-    return child_range(C, C + 1);
-  return child_range(&Condition, &Condition + 1);
-}
-
 OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num,
                                            unsigned NumLoops,
                                            SourceLocation StartLoc,
diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp
index b608606a83f8c..2f734d14b0216 100644
--- a/clang/test/Analysis/cfg-openmp.cpp
+++ b/clang/test/Analysis/cfg-openmp.cpp
@@ -1,402 +1,340 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp -fopenmp-version=45 | FileCheck %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp | FileCheck %s
 
 // CHECK-LABEL:  void xxx(int argc)
 void xxx(int argc) {
 // CHECK:        [B1]
 // CHECK-NEXT:   1: int x;
-// CHECK-NEXT:   2: int cond;
-  int x, cond;
-// CHECK-NEXT:   [[#ATOM:]]: x
-// CHECK-NEXT:   [[#ATOM+1]]: [B1.[[#ATOM]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:   [[#ATOM+2]]: argc
-// CHECK-NEXT:   [[#ATOM+3]]: [B1.[[#ATOM+2]]] = [B1.[[#ATOM+1]]]
-// CHECK-NEXT:   [[#ATOM+4]]: #pragma omp atomic read
-// CHECK-NEXT:   [B1.[[#ATOM+3]]];
+  int x;
+// CHECK-NEXT:   2: x
+// CHECK-NEXT:   3: [B1.2] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:   4: argc
+// CHECK-NEXT:   5: [B1.4] = [B1.3]
+// CHECK-NEXT:   6: #pragma omp atomic read
+// CHECK-NEXT:    [B1.5];
 #pragma omp atomic read
   argc = x;
-// CHECK-NEXT:   [[#CRIT:]]: x
-// CHECK-NEXT:   [[#CRIT+1]]: [B1.[[#CRIT]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:   [[#CRIT+2]]: argc
-// CHECK-NEXT:   [[#CRIT+3]]: [B1.[[#CRIT+2]]] = [B1.[[#CRIT+1]]]
-// CHECK-NEXT:   [[#CRIT+4]]: #pragma omp critical
-// CHECK-NEXT:   [B1.[[#CRIT+3]]];
+// CHECK-NEXT:   7: x
+// CHECK-NEXT:   8: [B1.7] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:   9: argc
+// CHECK-NEXT:  10: [B1.9] = [B1.8]
+// CHECK-NEXT:  11: #pragma omp critical
+// CHECK-NEXT:    [B1.10];
 #pragma omp critical
   argc = x;
-// CHECK-NEXT:  [[#DPF:]]: x
-// CHECK-NEXT:  [[#DPF+1]]: [B1.[[#DPF]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#DPF+2]]: argc
-// CHECK-NEXT:  [[#DPF+3]]: [B1.[[#DPF+2]]] = [B1.[[#DPF+1]]]
-// CHECK-NEXT:  [[#DPF+4]]: cond
-// CHECK-NEXT:  [[#DPF+5]]: [B1.[[#DPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#DPF+6]]: [B1.[[#DPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#DPF+7]]: #pragma omp distribute parallel for if(parallel: cond)
+// CHECK-NEXT:  12: x
+// CHECK-NEXT:  13: [B1.12] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  14: argc
+// CHECK-NEXT:  15: [B1.14] = [B1.13]
+// CHECK-NEXT:  16: #pragma omp distribute parallel for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#DPF+3]]];
-#pragma omp distribute parallel for if(parallel:cond)
+// CHECK-NEXT:        [B1.15];
+#pragma omp distribute parallel for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#DPFS:]]: x
-// CHECK-NEXT:  [[#DPFS+1]]: [B1.[[#DPFS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#DPFS+2]]: argc
-// CHECK-NEXT:  [[#DPFS+3]]: [B1.[[#DPFS+2]]] = [B1.[[#DPFS+1]]]
-// CHECK-NEXT:  [[#DPFS+4]]: cond
-// CHECK-NEXT:  [[#DPFS+5]]: [B1.[[#DPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#DPFS+6]]: [B1.[[#DPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#DPFS+7]]: #pragma omp distribute parallel for simd if(cond)
+// CHECK-NEXT:  17: x
+// CHECK-NEXT:  18: [B1.17] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  19: argc
+// CHECK-NEXT:  20: [B1.19] = [B1.18]
+// CHECK-NEXT:  21: #pragma omp distribute parallel for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#DPFS+3]]];
-#pragma omp distribute parallel for simd if(cond)
+// CHECK-NEXT:        [B1.20];
+#pragma omp distribute parallel for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#DS:]]: x
-// CHECK-NEXT:  [[#DS+1]]: [B1.[[#DS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#DS+2]]: argc
-// CHECK-NEXT:  [[#DS+3]]: [B1.[[#DS+2]]] = [B1.[[#DS+1]]]
-// CHECK-NEXT:  [[#DS+4]]: #pragma omp distribute simd
+// CHECK-NEXT:  22: x
+// CHECK-NEXT:  23: [B1.22] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  24: argc
+// CHECK-NEXT:  25: [B1.24] = [B1.23]
+// CHECK-NEXT:  26: #pragma omp distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#DS+3]]];
+// CHECK-NEXT:        [B1.25];
 #pragma omp distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#FOR:]]: x
-// CHECK-NEXT:  [[#FOR+1]]: [B1.[[#FOR]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#FOR+2]]: argc
-// CHECK-NEXT:  [[#FOR+3]]: [B1.[[#FOR+2]]] = [B1.[[#FOR+1]]]
-// CHECK-NEXT:  [[#FOR+4]]: #pragma omp for
+// CHECK-NEXT:  27: x
+// CHECK-NEXT:  28: [B1.27] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  29: argc
+// CHECK-NEXT:  30: [B1.29] = [B1.28]
+// CHECK-NEXT:  31: #pragma omp for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#FOR+3]]];
+// CHECK-NEXT:        [B1.30];
 #pragma omp for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#FS:]]: x
-// CHECK-NEXT:  [[#FS+1]]: [B1.[[#FS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#FS+2]]: argc
-// CHECK-NEXT:  [[#FS+3]]: [B1.[[#FS+2]]] = [B1.[[#FS+1]]]
-// CHECK-NEXT:  [[#FS+4]]: #pragma omp for simd
+// CHECK-NEXT:  32: x
+// CHECK-NEXT:  33: [B1.32] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  34: argc
+// CHECK-NEXT:  35: [B1.34] = [B1.33]
+// CHECK-NEXT:  36: #pragma omp for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#FS+3]]];
+// CHECK-NEXT:        [B1.35];
 #pragma omp for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#MASTER:]]: x
-// CHECK-NEXT:  [[#MASTER+1]]: [B1.[[#MASTER]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#MASTER+2]]: argc
-// CHECK-NEXT:  [[#MASTER+3]]: [B1.[[#MASTER+2]]] = [B1.[[#MASTER+1]]]
-// CHECK-NEXT:  [[#MASTER+4]]: #pragma omp master
-// CHECK-NEXT:    [B1.[[#MASTER+3]]];
+// CHECK-NEXT:  37: x
+// CHECK-NEXT:  38: [B1.37] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  39: argc
+// CHECK-NEXT:  40: [B1.39] = [B1.38]
+// CHECK-NEXT:  41: #pragma omp master
+// CHECK-NEXT:    [B1.40];
 #pragma omp master
   argc = x;
-// CHECK-NEXT:  [[#ORD:]]: x
-// CHECK-NEXT:  [[#ORD+1]]: [B1.[[#ORD]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#ORD+2]]: argc
-// CHECK-NEXT:  [[#ORD+3]]: [B1.[[#ORD+2]]] = [B1.[[#ORD+1]]]
-// CHECK-NEXT:  [[#ORD+4]]: #pragma omp ordered
-// CHECK-NEXT:    [B1.[[#ORD+3]]];
-// CHECK-NEXT:  [[#ORD+5]]: #pragma omp for ordered
+// CHECK-NEXT:  42: x
+// CHECK-NEXT:  43: [B1.42] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  44: argc
+// CHECK-NEXT:  45: [B1.44] = [B1.43]
+// CHECK-NEXT:  46: #pragma omp ordered
+// CHECK-NEXT:    [B1.45];
+// CHECK-NEXT:  47: #pragma omp for ordered
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i) {
-// CHECK-NEXT:[B1.[[#ORD+4]]]    }
+// CHECK-NEXT:[B1.46]    }
 #pragma omp for ordered
   for (int i = 0; i < 10; ++i) {
 #pragma omp ordered
     argc = x;
   }
-// CHECK-NEXT:  [[#PF:]]: x
-// CHECK-NEXT:  [[#PF+1]]: [B1.[[#PF]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PF+2]]: argc
-// CHECK-NEXT:  [[#PF+3]]: [B1.[[#PF+2]]] = [B1.[[#PF+1]]]
-// CHECK-NEXT:  [[#PF+4]]: cond
-// CHECK-NEXT:  [[#PF+5]]: [B1.[[#PF+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PF+6]]: [B1.[[#PF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#PF+7]]: #pragma omp parallel for if(cond)
+// CHECK-NEXT:  48: x
+// CHECK-NEXT:  49: [B1.48] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  50: argc
+// CHECK-NEXT:  51: [B1.50] = [B1.49]
+// CHECK-NEXT:  52: #pragma omp parallel for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#PF+3]]];
-#pragma omp parallel for if(cond)
+// CHECK-NEXT:        [B1.51];
+#pragma omp parallel for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#PFS:]]: x
-// CHECK-NEXT:  [[#PFS+1]]: [B1.[[#PFS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PFS+2]]: argc
-// CHECK-NEXT:  [[#PFS+3]]: [B1.[[#PFS+2]]] = [B1.[[#PFS+1]]]
-// CHECK-NEXT:  [[#PFS+4]]: cond
-// CHECK-NEXT:  [[#PFS+5]]: [B1.[[#PFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PFS+6]]: [B1.[[#PFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#PFS+7]]: #pragma omp parallel for simd if(cond)
+// CHECK-NEXT:  53: x
+// CHECK-NEXT:  54: [B1.53] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  55: argc
+// CHECK-NEXT:  56: [B1.55] = [B1.54]
+// CHECK-NEXT:  57: #pragma omp parallel for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#PFS+3]]];
-#pragma omp parallel for simd if(cond)
+// CHECK-NEXT:        [B1.56];
+#pragma omp parallel for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#PAR:]]: x
-// CHECK-NEXT:  [[#PAR+1]]: [B1.[[#PAR]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PAR+2]]: argc
-// CHECK-NEXT:  [[#PAR+3]]: [B1.[[#PAR+2]]] = [B1.[[#PAR+1]]]
-// CHECK-NEXT:  [[#PAR+4]]: cond
-// CHECK-NEXT:  [[#PAR+5]]: [B1.[[#PAR+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PAR+6]]: [B1.[[#PAR+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#PAR+7]]: #pragma omp parallel if(cond)
-// CHECK-NEXT:    [B1.[[#PAR+3]]];
-#pragma omp parallel if(cond)
+// CHECK-NEXT:  58: x
+// CHECK-NEXT:  59: [B1.58] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  60: argc
+// CHECK-NEXT:  61: [B1.60] = [B1.59]
+// CHECK-NEXT:  62: #pragma omp parallel
+// CHECK-NEXT:    [B1.61];
+#pragma omp parallel
   argc = x;
-// CHECK-NEXT:  [[#PSECT:]]: x
-// CHECK-NEXT:  [[#PSECT+1]]: [B1.[[#PSECT]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PSECT+2]]: argc
-// CHECK-NEXT:  [[#PSECT+3]]: [B1.[[#PSECT+2]]] = [B1.[[#PSECT+1]]]
-// CHECK-NEXT:  [[#PSECT+4]]: cond
-// CHECK-NEXT:  [[#PSECT+5]]: [B1.[[#PSECT+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#PSECT+6]]: [B1.[[#PSECT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#PSECT+7]]: #pragma omp parallel sections if(cond)
+// CHECK-NEXT:  63: x
+// CHECK-NEXT:  64: [B1.63] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  65: argc
+// CHECK-NEXT:  66: [B1.65] = [B1.64]
+// CHECK-NEXT:  67: #pragma omp parallel sections
 // CHECK-NEXT:    {
-// CHECK-NEXT:        [B1.[[#PSECT+3]]];
+// CHECK-NEXT:        [B1.66];
 // CHECK-NEXT:    }
-#pragma omp parallel sections if(cond)
+#pragma omp parallel sections
   {
     argc = x;
   }
-// CHECK-NEXT:  [[#SIMD:]]: x
-// CHECK-NEXT:  [[#SIMD+1]]: [B1.[[#SIMD]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#SIMD+2]]: argc
-// CHECK-NEXT:  [[#SIMD+3]]: [B1.[[#SIMD+2]]] = [B1.[[#SIMD+1]]]
-// CHECK-NEXT:  [[#SIMD+4]]: #pragma omp simd
+// CHECK-NEXT:  68: x
+// CHECK-NEXT:  69: [B1.68] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  70: argc
+// CHECK-NEXT:  71: [B1.70] = [B1.69]
+// CHECK-NEXT:  72: #pragma omp simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#SIMD+3]]];
+// CHECK-NEXT:        [B1.71];
 #pragma omp simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#SINGLE:]]: x
-// CHECK-NEXT:  [[#SINGLE+1]]: [B1.[[#SINGLE]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#SINGLE+2]]: argc
-// CHECK-NEXT:  [[#SINGLE+3]]: [B1.[[#SINGLE+2]]] = [B1.[[#SINGLE+1]]]
-// CHECK-NEXT:  [[#SINGLE+4]]: #pragma omp single
-// CHECK-NEXT:    [B1.[[#SINGLE+3]]];
+// CHECK-NEXT:  73: x
+// CHECK-NEXT:  74: [B1.73] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  75: argc
+// CHECK-NEXT:  76: [B1.75] = [B1.74]
+// CHECK-NEXT:  77: #pragma omp single
+// CHECK-NEXT:    [B1.76];
 #pragma omp single
   argc = x;
-// CHECK-NEXT:  [[#TARGET:]]: x
-// CHECK-NEXT:  [[#TARGET+1]]: [B1.[[#TARGET]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TARGET+2]]: argc
-// CHECK-NEXT:  [[#TARGET+3]]: [B1.[[#TARGET+2]]] = [B1.[[#TARGET+1]]]
-// CHECK-NEXT:  [[#TARGET+4]]: cond
-// CHECK-NEXT:  [[#TARGET+5]]: [B1.[[#TARGET+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TARGET+6]]: [B1.[[#TARGET+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TARGET+7]]: #pragma omp target depend(in : argc) if(cond)
-// CHECK-NEXT:    [B1.[[#TARGET+3]]];
+// CHECK-NEXT:  78: x
+// CHECK-NEXT:  79: [B1.78] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  80: argc
+// CHECK-NEXT:  81: [B1.80] = [B1.79]
+// CHECK-NEXT:  82: #pragma omp target depend(in : argc)
+// CHECK-NEXT:    [B1.81];
 #pragma omp target depend(in \
-                          : argc) if(cond)
+                          : argc)
   argc = x;
-// CHECK-NEXT:  [[#TPF:]]: x
-// CHECK-NEXT:  [[#TPF+1]]: [B1.[[#TPF]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TPF+2]]: argc
-// CHECK-NEXT:  [[#TPF+3]]: [B1.[[#TPF+2]]] = [B1.[[#TPF+1]]]
-// CHECK-NEXT:  [[#TPF+4]]: cond
-// CHECK-NEXT:  [[#TPF+5]]: [B1.[[#TPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TPF+6]]: [B1.[[#TPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TPF+7]]: #pragma omp target parallel for if(parallel: cond)
+// CHECK-NEXT:  83: x
+// CHECK-NEXT:  84: [B1.83] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  85: argc
+// CHECK-NEXT:  86: [B1.85] = [B1.84]
+// CHECK-NEXT:  87: #pragma omp target parallel for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TPF+3]]];
-#pragma omp target parallel for if(parallel:cond)
+// CHECK-NEXT:        [B1.86];
+#pragma omp target parallel for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TPFS:]]: x
-// CHECK-NEXT:  [[#TPFS+1]]: [B1.[[#TPFS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TPFS+2]]: argc
-// CHECK-NEXT:  [[#TPFS+3]]: [B1.[[#TPFS+2]]] = [B1.[[#TPFS+1]]]
-// CHECK-NEXT:  [[#TPFS+4]]: cond
-// CHECK-NEXT:  [[#TPFS+5]]: [B1.[[#TPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TPFS+6]]: [B1.[[#TPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TPFS+7]]: #pragma omp target parallel for simd if(target: cond)
+// CHECK-NEXT:  88: x
+// CHECK-NEXT:  89: [B1.88] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  90: argc
+// CHECK-NEXT:  91: [B1.90] = [B1.89]
+// CHECK-NEXT:  92: #pragma omp target parallel for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TPFS+3]]];
-#pragma omp target parallel for simd if(target:cond)
+// CHECK-NEXT:        [B1.91];
+#pragma omp target parallel for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TP:]]: x
-// CHECK-NEXT:  [[#TP+1]]: [B1.[[#TP]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TP+2]]: argc
-// CHECK-NEXT:  [[#TP+3]]: [B1.[[#TP+2]]] = [B1.[[#TP+1]]]
-// CHECK-NEXT:  [[#TP+4]]: cond
-// CHECK-NEXT:  [[#TP+5]]: [B1.[[#TP+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TP+6]]: [B1.[[#TP+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TP+7]]: #pragma omp target parallel if(cond)
-// CHECK-NEXT:    [B1.[[#TP+3]]];
-#pragma omp target parallel if(cond)
+// CHECK-NEXT:  93: x
+// CHECK-NEXT:  94: [B1.93] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  95: argc
+// CHECK-NEXT:  96: [B1.95] = [B1.94]
+// CHECK-NEXT:  97: #pragma omp target parallel
+// CHECK-NEXT:    [B1.96];
+#pragma omp target parallel
   argc = x;
-// CHECK-NEXT:  [[#TSIMD:]]: x
-// CHECK-NEXT:  [[#TSIMD+1]]: [B1.[[#TSIMD]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TSIMD+2]]: argc
-// CHECK-NEXT:  [[#TSIMD+3]]: [B1.[[#TSIMD+2]]] = [B1.[[#TSIMD+1]]]
-// CHECK-NEXT:  [[#TSIMD+4]]: cond
-// CHECK-NEXT:  [[#TSIMD+5]]: [B1.[[#TSIMD+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TSIMD+6]]: [B1.[[#TSIMD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TSIMD+7]]: #pragma omp target simd if(cond)
+// CHECK-NEXT:  98: x
+// CHECK-NEXT:  99: [B1.98] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 100: argc
+// CHECK-NEXT: 101: [B1.100] = [B1.99]
+// CHECK-NEXT: 102: #pragma omp target simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TSIMD+3]]];
-#pragma omp target simd if(cond)
+// CHECK-NEXT:        [B1.101];
+#pragma omp target simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TTD:]]: x
-// CHECK-NEXT:  [[#TTD+1]]: [B1.[[#TTD]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTD+2]]: argc
-// CHECK-NEXT:  [[#TTD+3]]: [B1.[[#TTD+2]]] = [B1.[[#TTD+1]]]
-// CHECK-NEXT:  [[#TTD+4]]: cond
-// CHECK-NEXT:  [[#TTD+5]]: [B1.[[#TTD+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTD+6]]: [B1.[[#TTD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TTD+7]]: #pragma omp target teams distribute if(cond)
+// CHECK-NEXT: 103: x
+// CHECK-NEXT: 104: [B1.103] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 105: argc
+// CHECK-NEXT: 106: [B1.105] = [B1.104]
+// CHECK-NEXT: 107: #pragma omp target teams distribute
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TTD+3]]];
-#pragma omp target teams distribute if(cond)
+// CHECK-NEXT:        [B1.106];
+#pragma omp target teams distribute
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TTDPF:]]: x
-// CHECK-NEXT:  [[#TTDPF+1]]: [B1.[[#TTDPF]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTDPF+2]]: argc
-// CHECK-NEXT:  [[#TTDPF+3]]: [B1.[[#TTDPF+2]]] = [B1.[[#TTDPF+1]]]
-// CHECK-NEXT:  [[#TTDPF+4]]: cond
-// CHECK-NEXT:  [[#TTDPF+5]]: [B1.[[#TTDPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTDPF+6]]: [B1.[[#TTDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TTDPF+7]]: #pragma omp target teams distribute parallel for if(cond)
+// CHECK-NEXT: 108: x
+// CHECK-NEXT: 109: [B1.108] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 110: argc
+// CHECK-NEXT: 111: [B1.110] = [B1.109]
+// CHECK-NEXT: 112: #pragma omp target teams distribute parallel for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TTDPF+3]]];
-#pragma omp target teams distribute parallel for if(cond)
+// CHECK-NEXT:        [B1.111];
+#pragma omp target teams distribute parallel for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TTDPFS:]]: x
-// CHECK-NEXT:  [[#TTDPFS+1]]: [B1.[[#TTDPFS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTDPFS+2]]: argc
-// CHECK-NEXT:  [[#TTDPFS+3]]: [B1.[[#TTDPFS+2]]] = [B1.[[#TTDPFS+1]]]
-// CHECK-NEXT:  [[#TTDPFS+4]]: cond
-// CHECK-NEXT:  [[#TTDPFS+5]]: [B1.[[#TTDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTDPFS+6]]: [B1.[[#TTDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TTDPFS+7]]: #pragma omp target teams distribute parallel for simd if(parallel: cond)
+// CHECK-NEXT: 113: x
+// CHECK-NEXT: 114: [B1.113] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 115: argc
+// CHECK-NEXT: 116: [B1.115] = [B1.114]
+// CHECK-NEXT: 117: #pragma omp target teams distribute parallel for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TTDPFS+3]]];
-#pragma omp target teams distribute parallel for simd if(parallel:cond)
+// CHECK-NEXT:        [B1.116];
+#pragma omp target teams distribute parallel for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TTDS:]]: x
-// CHECK-NEXT:  [[#TTDS+1]]: [B1.[[#TTDS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTDS+2]]: argc
-// CHECK-NEXT:  [[#TTDS+3]]: [B1.[[#TTDS+2]]] = [B1.[[#TTDS+1]]]
-// CHECK-NEXT:  [[#TTDS+4]]: cond
-// CHECK-NEXT:  [[#TTDS+5]]: [B1.[[#TTDS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TTDS+6]]: [B1.[[#TTDS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TTDS+7]]: #pragma omp target teams distribute simd if(cond)
+// CHECK-NEXT: 118: x
+// CHECK-NEXT: 119: [B1.118] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 120: argc
+// CHECK-NEXT: 121: [B1.120] = [B1.119]
+// CHECK-NEXT: 122: #pragma omp target teams distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TTDS+3]]];
-#pragma omp target teams distribute simd if(cond)
+// CHECK-NEXT:        [B1.121];
+#pragma omp target teams distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TT:]]: x
-// CHECK-NEXT:  [[#TT+1]]: [B1.[[#TT]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TT+2]]: argc
-// CHECK-NEXT:  [[#TT+3]]: [B1.[[#TT+2]]] = [B1.[[#TT+1]]]
-// CHECK-NEXT:  [[#TT+4]]: cond
-// CHECK-NEXT:  [[#TT+5]]: [B1.[[#TT+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TT+6]]: [B1.[[#TT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TT+7]]: #pragma omp target teams if(cond)
-// CHECK-NEXT:    [B1.[[#TT+3]]];
-#pragma omp target teams if(cond)
+// CHECK-NEXT: 123: x
+// CHECK-NEXT: 124: [B1.123] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 125: argc
+// CHECK-NEXT: 126: [B1.125] = [B1.124]
+// CHECK-NEXT: 127: #pragma omp target teams
+// CHECK-NEXT:    [B1.126];
+#pragma omp target teams
   argc = x;
-// CHECK-NEXT: [[#TU:]]: cond
-// CHECK-NEXT: [[#TU+1]]: [B1.[[#TU]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: [[#TU+2]]: [B1.[[#TU+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT: [[#TU+3]]: #pragma omp target update to(x) if(target update: cond)
-#pragma omp target update to(x) if(target update:cond)
-// CHECK-NEXT:  [[#TASK:]]: x
-// CHECK-NEXT:  [[#TASK+1]]: [B1.[[#TASK]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TASK+2]]: argc
-// CHECK-NEXT:  [[#TASK+3]]: [B1.[[#TASK+2]]] = [B1.[[#TASK+1]]]
-// CHECK-NEXT:  [[#TASK+4]]: cond
-// CHECK-NEXT:  [[#TASK+5]]: [B1.[[#TASK+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TASK+6]]: [B1.[[#TASK+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TASK+7]]: #pragma omp task if(cond)
-// CHECK-NEXT:    [B1.[[#TASK+3]]];
-#pragma omp task if(cond)
+// CHECK-NEXT: 128: #pragma omp target update to(x)
+#pragma omp target update to(x)
+// CHECK-NEXT: 129: x
+// CHECK-NEXT: 130: [B1.129] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 131: argc
+// CHECK-NEXT: 132: [B1.131] = [B1.130]
   argc = x;
-// CHECK-NEXT:  [[#TG:]]: x
-// CHECK-NEXT:  [[#TG+1]]: [B1.[[#TG]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TG+2]]: argc
-// CHECK-NEXT:  [[#TG+3]]: [B1.[[#TG+2]]] = [B1.[[#TG+1]]]
-// CHECK-NEXT:  [[#TG+4]]: #pragma omp taskgroup
-// CHECK-NEXT:    [B1.[[#TG+3]]];
+// CHECK-NEXT: 133: x
+// CHECK-NEXT: 134: [B1.133] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 135: argc
+// CHECK-NEXT: 136: [B1.135] = [B1.134]
+// CHECK-NEXT: 137: #pragma omp task
+// CHECK-NEXT:    [B1.136];
+#pragma omp task
+  argc = x;
+// CHECK-NEXT: 138: x
+// CHECK-NEXT: 139: [B1.138] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 140: argc
+// CHECK-NEXT: 141: [B1.140] = [B1.139]
+// CHECK-NEXT: 142: #pragma omp taskgroup
+// CHECK-NEXT:    [B1.141];
 #pragma omp taskgroup
   argc = x;
-// CHECK-NEXT:  [[#TL:]]: x
-// CHECK-NEXT:  [[#TL+1]]: [B1.[[#TL]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TL+2]]: argc
-// CHECK-NEXT:  [[#TL+3]]: [B1.[[#TL+2]]] = [B1.[[#TL+1]]]
-// CHECK-NEXT:  [[#TL+4]]: cond
-// CHECK-NEXT:  [[#TL+5]]: [B1.[[#TL+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TL+6]]: [B1.[[#TL+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TL+7]]: #pragma omp taskloop if(cond)
+// CHECK-NEXT: 143: x
+// CHECK-NEXT: 144: [B1.143] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 145: argc
+// CHECK-NEXT: 146: [B1.145] = [B1.144]
+// CHECK-NEXT: 147: #pragma omp taskloop
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TL+3]]];
-#pragma omp taskloop if(cond)
+// CHECK-NEXT:        [B1.146];
+#pragma omp taskloop
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TLS:]]: x
-// CHECK-NEXT:  [[#TLS+1]]: [B1.[[#TLS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TLS+2]]: argc
-// CHECK-NEXT:  [[#TLS+3]]: [B1.[[#TLS+2]]] = [B1.[[#TLS+1]]]
-// CHECK-NEXT:  [[#TLS+4]]: cond
-// CHECK-NEXT:  [[#TLS+5]]: [B1.[[#TLS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TLS+6]]: [B1.[[#TLS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TLS+7]]: #pragma omp taskloop simd if(cond)
+// CHECK-NEXT: 148: x
+// CHECK-NEXT: 149: [B1.148] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 150: argc
+// CHECK-NEXT: 151: [B1.150] = [B1.149]
+// CHECK-NEXT: 152: #pragma omp taskloop simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TLS+3]]];
-#pragma omp taskloop simd if(cond)
+// CHECK-NEXT:        [B1.151];
+#pragma omp taskloop simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [[#TDPF:]]: x
-// CHECK-NEXT:  [[#TDPF+1]]: [B1.[[#TDPF]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TDPF+2]]: argc
-// CHECK-NEXT:  [[#TDPF+3]]: [B1.[[#TDPF+2]]] = [B1.[[#TDPF+1]]]
-// CHECK-NEXT:  [[#TDPF+4]]: cond
-// CHECK-NEXT:  [[#TDPF+5]]: [B1.[[#TDPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TDPF+6]]: [B1.[[#TDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TDPF+7]]: #pragma omp teams distribute parallel for if(cond)
+// CHECK-NEXT: 153: x
+// CHECK-NEXT: 154: [B1.153] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 155: argc
+// CHECK-NEXT: 156: [B1.155] = [B1.154]
+// CHECK-NEXT: 157: #pragma omp teams distribute parallel for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TDPF+3]]];
-// CHECK-NEXT:  [[#TDPF+8]]: #pragma omp target
+// CHECK-NEXT:        [B1.156];
+// CHECK-NEXT: 158: #pragma omp target
 #pragma omp target
-#pragma omp teams distribute parallel for if(cond)
+#pragma omp teams distribute parallel for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [B1.[[#TDPF+7]]] [[#TDPFS:]]: x
-// CHECK-NEXT:  [[#TDPFS+1]]: [B1.[[#TDPFS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TDPFS+2]]: argc
-// CHECK-NEXT:  [[#TDPFS+3]]: [B1.[[#TDPFS+2]]] = [B1.[[#TDPFS+1]]]
-// CHECK-NEXT:  [[#TDPFS+4]]: cond
-// CHECK-NEXT:  [[#TDPFS+5]]: [B1.[[#TDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TDPFS+6]]: [B1.[[#TDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
-// CHECK-NEXT:  [[#TDPFS+7]]: #pragma omp teams distribute parallel for simd
+// CHECK-NEXT:[B1.157] 159: x
+// CHECK-NEXT: 160: [B1.159] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 161: argc
+// CHECK-NEXT: 162: [B1.161] = [B1.160]
+// CHECK-NEXT: 163: #pragma omp teams distribute parallel for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TDPFS+3]]];
-// CHECK-NEXT:  [[#TDPFS+8]]: #pragma omp target
+// CHECK-NEXT:        [B1.162];
+// CHECK-NEXT: 164: #pragma omp target
 #pragma omp target
-#pragma omp teams distribute parallel for simd if(cond)
+#pragma omp teams distribute parallel for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [B1.[[#TDPFS+7]]] [[#TDS:]]: x
-// CHECK-NEXT:  [[#TDS+1]]: [B1.[[#TDS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TDS+2]]: argc
-// CHECK-NEXT:  [[#TDS+3]]: [B1.[[#TDS+2]]] = [B1.[[#TDS+1]]]
-// CHECK-NEXT:  [[#TDS+4]]: #pragma omp teams distribute simd
+// CHECK-NEXT:[B1.163] 165: x
+// CHECK-NEXT: 166: [B1.165] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 167: argc
+// CHECK-NEXT: 168: [B1.167] = [B1.166]
+// CHECK-NEXT: 169: #pragma omp teams distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.[[#TDS+3]]];
-// CHECK-NEXT:  [[#TDS+5]]: #pragma omp target
+// CHECK-NEXT:        [B1.168];
+// CHECK-NEXT: 170: #pragma omp target
 #pragma omp target
 #pragma omp teams distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  [B1.[[#TDS+4]]] [[#TEAMS:]]: x
-// CHECK-NEXT:  [[#TEAMS+1]]: [B1.[[#TEAMS]]] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  [[#TEAMS+2]]: argc
-// CHECK-NEXT:  [[#TEAMS+3]]: [B1.[[#TEAMS+2]]] = [B1.[[#TEAMS+1]]]
-// CHECK-NEXT:  [[#TEAMS+4]]: #pragma omp teams
-// CHECK-NEXT:    [B1.[[#TEAMS+3]]];
-// CHECK-NEXT:  [[#TEAMS+5]]: #pragma omp target
+// CHECK-NEXT:[B1.169] 171: x
+// CHECK-NEXT: 172: [B1.171] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: 173: argc
+// CHECK-NEXT: 174: [B1.173] = [B1.172]
+// CHECK-NEXT: 175: #pragma omp teams
+// CHECK-NEXT:    [B1.174];
+// CHECK-NEXT: 176: #pragma omp target
 #pragma omp target
 #pragma omp teams
   argc = x;
-// CHECK-NEXT:  [B1.[[#TEAMS+4]]]   Preds
+// CHECK-NEXT:[B1.175]   Preds
 }
 
diff --git a/clang/test/OpenMP/cancel_if_messages.cpp b/clang/test/OpenMP/cancel_if_messages.cpp
index 222087ca9e61b..3d629c927e907 100644
--- a/clang/test/OpenMP/cancel_if_messages.cpp
+++ b/clang/test/OpenMP/cancel_if_messages.cpp
@@ -9,16 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp parallel
-  {
-#pragma omp cancel parallel if (cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-    for (int i = 0; i < 10; ++i)
-      ;
-  }
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
index e628a15c3ab44..a06ff2377c043 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
index 6cf18faf0a87f..7769272026e6b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_for_if_messages.cpp b/clang/test/OpenMP/parallel_for_if_messages.cpp
index 56bb06be0cc71..32f9ef3a7defa 100644
--- a/clang/test/OpenMP/parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
index bab9339d49174..aa1e302d04242 100644
--- a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_if_messages.cpp b/clang/test/OpenMP/parallel_if_messages.cpp
index f095e66bbfa5e..7f802a9e4236a 100644
--- a/clang/test/OpenMP/parallel_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_sections_if_messages.cpp b/clang/test/OpenMP/parallel_sections_if_messages.cpp
index b7c92df4f30df..8d36b6d5d3086 100644
--- a/clang/test/OpenMP/parallel_sections_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_sections_if_messages.cpp
@@ -9,14 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp parallel sections if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  {
-    ;
-  }
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_data_if_messages.cpp b/clang/test/OpenMP/target_data_if_messages.cpp
index 29f898c6d9fa7..c6f9b4b34eeea 100644
--- a/clang/test/OpenMP/target_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_data_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target data map(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_enter_data_if_messages.cpp b/clang/test/OpenMP/target_enter_data_if_messages.cpp
index 21019e9ae7f8c..5123d607dc6a1 100644
--- a/clang/test/OpenMP/target_enter_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_enter_data_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target enter data map(to:argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_exit_data_if_messages.cpp b/clang/test/OpenMP/target_exit_data_if_messages.cpp
index 7b2385c16cd21..c45b32ff3fe75 100644
--- a/clang/test/OpenMP/target_exit_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_exit_data_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target exit data map(from: argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_if_messages.cpp b/clang/test/OpenMP/target_if_messages.cpp
index f381e9eb91ebd..e6b667f2cffbf 100644
--- a/clang/test/OpenMP/target_if_messages.cpp
+++ b/clang/test/OpenMP/target_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_if_messages.cpp
index a5a181b9d273a..445dc1775b0f2 100644
--- a/clang/test/OpenMP/target_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
index ef9a2089d1087..b0da8017019f0 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target parallel for simd if(parallel: cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_if_messages.cpp b/clang/test/OpenMP/target_parallel_if_messages.cpp
index ac498a7108b0d..460e0c8655f09 100644
--- a/clang/test/OpenMP/target_parallel_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_simd_if_messages.cpp b/clang/test/OpenMP/target_simd_if_messages.cpp
index 5f3e9e3910ac6..94d2ab308daa2 100644
--- a/clang/test/OpenMP/target_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_simd_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
index 499cd3ac58050..fd1ffb08cbe8c 100644
--- a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target teams distribute if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
index 6df23076472ec..e1114028b6877 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
index e88c1f1dbbfff..59c75893a1714 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
@@ -9,14 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target teams distribute parallel for simd if (parallel \
-                                                          : cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
index 53af6e759d21e..7134a8394cbb8 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target teams distribute simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_if_messages.cpp b/clang/test/OpenMP/target_teams_if_messages.cpp
index 4bc82a349398d..8d3d690d631fa 100644
--- a/clang/test/OpenMP/target_teams_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target teams if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_update_if_messages.cpp b/clang/test/OpenMP/target_update_if_messages.cpp
index d967713e456fb..9ded332b04eb9 100644
--- a/clang/test/OpenMP/target_update_if_messages.cpp
+++ b/clang/test/OpenMP/target_update_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target update to(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/task_if_messages.cpp b/clang/test/OpenMP/task_if_messages.cpp
index 2d47b32b9a153..305af22149d85 100644
--- a/clang/test/OpenMP/task_if_messages.cpp
+++ b/clang/test/OpenMP/task_if_messages.cpp
@@ -9,13 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp task if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
index b76599d41a46a..6f724b050178a 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
@@ -9,14 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target
-#pragma omp teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
index 39a0b326383a2..c01e6e87e39a5 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
@@ -9,14 +9,6 @@ bool foobool(int argc) {
   return argc;
 }
 
-void xxx(int argc) {
-  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
-#pragma omp target
-#pragma omp teams distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
-  for (int i = 0; i < 10; ++i)
-    ;
-}
-
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}

From a17b1aed6ab205515adc31d19e953635e563e5c4 Mon Sep 17 00:00:00 2001
From: Zoe Carver <z.zoelec2@gmail.com>
Date: Tue, 16 Jul 2019 03:21:01 +0000
Subject: [PATCH 201/451] Add contains method to associative containers. This
 patch implements P0458R2, adding contains to map, multimap, unordered_map,
 unordered_multimap, set, multiset, unordered_set, and unordered_multiset.

llvm-svn: 366170
---
 libcxx/include/map                            | 16 ++++-
 libcxx/include/set                            | 19 +++++-
 libcxx/include/unordered_map                  | 10 +++
 libcxx/include/unordered_set                  | 10 +++
 .../associative/map/contains.pass.cpp         | 62 +++++++++++++++++++
 .../associative/set/contains.pass.cpp         | 44 +++++++++++++
 .../unord/unord.map/contains.pass.cpp         | 62 +++++++++++++++++++
 .../unord/unord.set/contains.pass.cpp         | 44 +++++++++++++
 libcxx/www/cxx2a_status.html                  |  2 +-
 9 files changed, 263 insertions(+), 6 deletions(-)
 create mode 100644 libcxx/test/std/containers/associative/map/contains.pass.cpp
 create mode 100644 libcxx/test/std/containers/associative/set/contains.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/contains.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/contains.pass.cpp

diff --git a/libcxx/include/map b/libcxx/include/map
index 6805a513394a3..eb6ae57b011f8 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -193,8 +193,8 @@ public:
         const_iterator find(const K& x) const;  // C++14
     template<typename K>
       size_type count(const K& x) const;        // C++14
-
     size_type      count(const key_type& k) const;
+        bool contains(const key_type& x) const; // C++20
           iterator lower_bound(const key_type& k);
     const_iterator lower_bound(const key_type& k) const;
     template<typename K>
@@ -407,8 +407,8 @@ public:
         const_iterator find(const K& x) const;  // C++14
     template<typename K>
       size_type count(const K& x) const;        // C++14
-
     size_type      count(const key_type& k) const;
+        bool contains(const key_type& x) const; // C++20
           iterator lower_bound(const key_type& k);
     const_iterator lower_bound(const key_type& k) const;
     template<typename K>
@@ -1398,6 +1398,12 @@ public:
     typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type
     count(const _K2& __k) const {return __tree_.__count_multi(__k);}
 #endif
+
+#if _LIBCPP_STD_VER > 17
+    _LIBCPP_INLINE_VISIBILITY
+    bool contains(const key_type& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
+
     _LIBCPP_INLINE_VISIBILITY
     iterator lower_bound(const key_type& __k)
         {return __tree_.lower_bound(__k);}
@@ -2055,6 +2061,12 @@ public:
     typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type
     count(const _K2& __k) const {return __tree_.__count_multi(__k);}
 #endif
+
+#if _LIBCPP_STD_VER > 17
+    _LIBCPP_INLINE_VISIBILITY
+    bool contains(const key_type& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
+
     _LIBCPP_INLINE_VISIBILITY
     iterator lower_bound(const key_type& __k)
         {return __tree_.lower_bound(__k);}
diff --git a/libcxx/include/set b/libcxx/include/set
index 79e8f29f0b943..70ab4d37add20 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -155,9 +155,9 @@ public:
     template<typename K>
         const_iterator find(const K& x) const;  // C++14
     template<typename K>
-      size_type count(const K& x) const;        // C++14
-
+        size_type count(const K& x) const;        // C++14
     size_type      count(const key_type& k) const;
+        bool contains(const key_type& x) const; // C++20
           iterator lower_bound(const key_type& k);
     const_iterator lower_bound(const key_type& k) const;
     template<typename K>
@@ -354,8 +354,10 @@ public:
         iterator find(const K& x);
     template<typename K>
         const_iterator find(const K& x) const;  // C++14
-
+    template<typename K>
+        size_type count(const K& x) const;      // C++14
     size_type      count(const key_type& k) const;
+        bool contains(const key_type& x) const; // C++20
           iterator lower_bound(const key_type& k);
     const_iterator lower_bound(const key_type& k) const;
     template<typename K>
@@ -787,6 +789,12 @@ public:
     typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type
     count(const _K2& __k) const                    {return __tree_.__count_multi(__k);}
 #endif
+
+#if _LIBCPP_STD_VER > 17
+    _LIBCPP_INLINE_VISIBILITY
+    bool contains(const key_type& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
+
     _LIBCPP_INLINE_VISIBILITY
     iterator lower_bound(const key_type& __k)
         {return __tree_.lower_bound(__k);}
@@ -1307,6 +1315,11 @@ public:
     count(const _K2& __k) const            {return __tree_.__count_multi(__k);}
 #endif
 
+#if _LIBCPP_STD_VER > 17
+    _LIBCPP_INLINE_VISIBILITY
+    bool contains(const key_type& __k) const {return find(__k) != end();}
+#endif // _LIBCPP_STD_VER > 17
+
     _LIBCPP_INLINE_VISIBILITY
     iterator lower_bound(const key_type& __k)
         {return __tree_.lower_bound(__k);}
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index 63aecc8bc0ef5..ad17f776c9388 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -174,6 +174,7 @@ public:
     iterator       find(const key_type& k);
     const_iterator find(const key_type& k) const;
     size_type count(const key_type& k) const;
+    bool contains(const key_type& k) const; // C++20
     pair<iterator, iterator>             equal_range(const key_type& k);
     pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
 
@@ -355,6 +356,7 @@ public:
     iterator       find(const key_type& k);
     const_iterator find(const key_type& k) const;
     size_type count(const key_type& k) const;
+    bool contains(const key_type& k) const; // C++20
     pair<iterator, iterator>             equal_range(const key_type& k);
     pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
 
@@ -1278,6 +1280,10 @@ public:
     const_iterator find(const key_type& __k) const {return __table_.find(__k);}
     _LIBCPP_INLINE_VISIBILITY
     size_type count(const key_type& __k) const {return __table_.__count_unique(__k);}
+    #if _LIBCPP_STD_VER > 17
+        _LIBCPP_INLINE_VISIBILITY
+        bool contains(const key_type& __k) const {return find(__k) != end();}
+    #endif // _LIBCPP_STD_VER > 17
     _LIBCPP_INLINE_VISIBILITY
     pair<iterator, iterator>             equal_range(const key_type& __k)
         {return __table_.__equal_range_unique(__k);}
@@ -2049,6 +2055,10 @@ public:
     const_iterator find(const key_type& __k) const {return __table_.find(__k);}
     _LIBCPP_INLINE_VISIBILITY
     size_type count(const key_type& __k) const {return __table_.__count_multi(__k);}
+    #if _LIBCPP_STD_VER > 17
+        _LIBCPP_INLINE_VISIBILITY
+        bool contains(const key_type& __k) const {return find(__k) != end();}
+    #endif // _LIBCPP_STD_VER > 17
     _LIBCPP_INLINE_VISIBILITY
     pair<iterator, iterator>             equal_range(const key_type& __k)
         {return __table_.__equal_range_multi(__k);}
diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set
index 4a9f030932798..68f777a4ea3eb 100644
--- a/libcxx/include/unordered_set
+++ b/libcxx/include/unordered_set
@@ -146,6 +146,7 @@ public:
     iterator       find(const key_type& k);
     const_iterator find(const key_type& k) const;
     size_type count(const key_type& k) const;
+    bool contains(const key_type& k) const; // C++20
     pair<iterator, iterator>             equal_range(const key_type& k);
     pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
 
@@ -310,6 +311,7 @@ public:
     iterator       find(const key_type& k);
     const_iterator find(const key_type& k) const;
     size_type count(const key_type& k) const;
+    bool contains(const key_type& k) const; // C++20
     pair<iterator, iterator>             equal_range(const key_type& k);
     pair<const_iterator, const_iterator> equal_range(const key_type& k) const;
 
@@ -677,6 +679,10 @@ public:
     const_iterator find(const key_type& __k) const {return __table_.find(__k);}
     _LIBCPP_INLINE_VISIBILITY
     size_type count(const key_type& __k) const {return __table_.__count_unique(__k);}
+    #if _LIBCPP_STD_VER > 17
+        _LIBCPP_INLINE_VISIBILITY
+        bool contains(const key_type& __k) const {return find(__k) != end();}
+    #endif // _LIBCPP_STD_VER > 17
     _LIBCPP_INLINE_VISIBILITY
     pair<iterator, iterator>             equal_range(const key_type& __k)
         {return __table_.__equal_range_unique(__k);}
@@ -1304,6 +1310,10 @@ public:
     const_iterator find(const key_type& __k) const {return __table_.find(__k);}
     _LIBCPP_INLINE_VISIBILITY
     size_type count(const key_type& __k) const {return __table_.__count_multi(__k);}
+    #if _LIBCPP_STD_VER > 17
+        _LIBCPP_INLINE_VISIBILITY
+        bool contains(const key_type& __k) const {return find(__k) != end();}
+    #endif // _LIBCPP_STD_VER > 17
     _LIBCPP_INLINE_VISIBILITY
     pair<iterator, iterator>             equal_range(const key_type& __k)
         {return __table_.__equal_range_multi(__k);}
diff --git a/libcxx/test/std/containers/associative/map/contains.pass.cpp b/libcxx/test/std/containers/associative/map/contains.pass.cpp
new file mode 100644
index 0000000000000..5b71eedba4d82
--- /dev/null
+++ b/libcxx/test/std/containers/associative/map/contains.pass.cpp
@@ -0,0 +1,62 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
+
+#include <cassert>
+#include <map>
+
+// <map>
+
+// bool contains(const key_type& x) const;
+
+template <typename T, typename P, typename B, typename... Pairs>
+void test(B bad, Pairs... args) {
+    T map;
+    P pairs[] = {args...};
+
+    for (auto& p : pairs) map.insert(p);
+    for (auto& p : pairs) assert(map.contains(p.first));
+
+    assert(!map.contains(bad));
+}
+
+struct E { int a = 1; double b = 1; char c = 1; };
+
+int main(int, char**)
+{
+    {
+        test<std::map<char, int>, std::pair<char, int> >(
+            'e', std::make_pair('a', 10), std::make_pair('b', 11),
+            std::make_pair('c', 12), std::make_pair('d', 13));
+
+        test<std::map<char, char>, std::pair<char, char> >(
+            'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'),
+            std::make_pair('c', 'a'), std::make_pair('d', 'b'));
+
+        test<std::map<int, E>, std::pair<int, E> >(
+            -1, std::make_pair(1, E{}), std::make_pair(2, E{}),
+            std::make_pair(3, E{}), std::make_pair(4, E{}));
+    }
+    {
+        test<std::multimap<char, int>, std::pair<char, int> >(
+            'e', std::make_pair('a', 10), std::make_pair('b', 11),
+            std::make_pair('c', 12), std::make_pair('d', 13));
+
+        test<std::multimap<char, char>, std::pair<char, char> >(
+            'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'),
+            std::make_pair('c', 'a'), std::make_pair('d', 'b'));
+
+        test<std::multimap<int, E>, std::pair<int, E> >(
+            -1, std::make_pair(1, E{}), std::make_pair(2, E{}),
+            std::make_pair(3, E{}), std::make_pair(4, E{}));
+    }
+
+    return 0;
+}
+
diff --git a/libcxx/test/std/containers/associative/set/contains.pass.cpp b/libcxx/test/std/containers/associative/set/contains.pass.cpp
new file mode 100644
index 0000000000000..2b09729048723
--- /dev/null
+++ b/libcxx/test/std/containers/associative/set/contains.pass.cpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
+
+#include <cassert>
+#include <set>
+
+// <set>
+
+// bool contains(const key_type& x) const;
+
+template <typename T, typename V, typename B, typename... Vals>
+void test(B bad, Vals... args) {
+    T set;
+    V vals[] = {args...};
+
+    for (auto& v : vals) set.insert(v);
+    for (auto& v : vals) assert(set.contains(v));
+
+    assert(!set.contains(bad));
+}
+
+struct E { int a = 1; double b = 1; char c = 1; };
+
+int main(int, char**)
+{
+    {
+        test<std::set<int>, int>(14, 10, 11, 12, 13);
+        test<std::set<char>, char>('e', 'a', 'b', 'c', 'd');
+    }
+    {
+        test<std::multiset<int>, int>(14, 10, 11, 12, 13);
+        test<std::multiset<char>, char>('e', 'a', 'b', 'c', 'd');
+    }
+
+    return 0;
+}
+
diff --git a/libcxx/test/std/containers/unord/unord.map/contains.pass.cpp b/libcxx/test/std/containers/unord/unord.map/contains.pass.cpp
new file mode 100644
index 0000000000000..c591e197249f4
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/contains.pass.cpp
@@ -0,0 +1,62 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
+
+#include <cassert>
+#include <unordered_map>
+
+// <unordered_map>
+
+// bool contains(const key_type& x) const;
+
+template <typename T, typename P, typename B, typename... Pairs>
+void test(B bad, Pairs... args) {
+    T map;
+    P pairs[] = {args...};
+
+    for (auto& p : pairs) map.insert(p);
+    for (auto& p : pairs) assert(map.contains(p.first));
+
+    assert(!map.contains(bad));
+}
+
+struct E { int a = 1; double b = 1; char c = 1; };
+
+int main(int, char**)
+{
+    {
+        test<std::unordered_map<char, int>, std::pair<char, int> >(
+            'e', std::make_pair('a', 10), std::make_pair('b', 11),
+            std::make_pair('c', 12), std::make_pair('d', 13));
+
+        test<std::unordered_map<char, char>, std::pair<char, char> >(
+            'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'),
+            std::make_pair('c', 'a'), std::make_pair('d', 'b'));
+
+        test<std::unordered_map<int, E>, std::pair<int, E> >(
+            -1, std::make_pair(1, E{}), std::make_pair(2, E{}),
+            std::make_pair(3, E{}), std::make_pair(4, E{}));
+    }
+    {
+        test<std::unordered_multimap<char, int>, std::pair<char, int> >(
+            'e', std::make_pair('a', 10), std::make_pair('b', 11),
+            std::make_pair('c', 12), std::make_pair('d', 13));
+
+        test<std::unordered_multimap<char, char>, std::pair<char, char> >(
+            'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'),
+            std::make_pair('c', 'a'), std::make_pair('d', 'b'));
+
+        test<std::unordered_multimap<int, E>, std::pair<int, E> >(
+            -1, std::make_pair(1, E{}), std::make_pair(2, E{}),
+            std::make_pair(3, E{}), std::make_pair(4, E{}));
+    }
+
+    return 0;
+}
+
diff --git a/libcxx/test/std/containers/unord/unord.set/contains.pass.cpp b/libcxx/test/std/containers/unord/unord.set/contains.pass.cpp
new file mode 100644
index 0000000000000..3b87f2fd0e04d
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/contains.pass.cpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17
+
+#include <cassert>
+#include <unordered_set>
+
+// <unordered_set>
+
+// bool contains(const key_type& x) const;
+
+template <typename T, typename V, typename B, typename... Vals>
+void test(B bad, Vals... args) {
+    T set;
+    V vals[] = {args...};
+
+    for (auto& v : vals) set.insert(v);
+    for (auto& v : vals) assert(set.contains(v));
+
+    assert(!set.contains(bad));
+}
+
+struct E { int a = 1; double b = 1; char c = 1; };
+
+int main(int, char**)
+{
+    {
+        test<std::unordered_set<int>, int>(14, 10, 11, 12, 13);
+        test<std::unordered_set<char>, char>('e', 'a', 'b', 'c', 'd');
+    }
+    {
+        test<std::unordered_multiset<int>, int>(14, 10, 11, 12, 13);
+        test<std::unordered_multiset<char>, char>('e', 'a', 'b', 'c', 'd');
+    }
+
+    return 0;
+}
+
diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html
index 2b19eb86ba332..9489c07ef0bbe 100644
--- a/libcxx/www/cxx2a_status.html
+++ b/libcxx/www/cxx2a_status.html
@@ -83,7 +83,7 @@ <h3>Paper Status</h3>
 
   	<tr><td></td><td></td><td></td><td></td><td></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0019R8">P0019R8</a></td><td>LWG</td><td>Atomic Ref</td><td>Rapperswil</td><td></td><td></td></tr>
-	<tr><td><a href="https://wg21.link/P0458R2">P0458R2</a></td><td>LWG</td><td>Checking for Existence of an Element in Associative Containers</td><td>Rapperswil</td><td></td><td></td></tr>
+	<tr><td><a href="https://wg21.link/P0458R2">P0458R2</a></td><td>LWG</td><td>Checking for Existence of an Element in Associative Containers</td><td>Rapperswil</td><td>Complete</td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0475R1">P0475R1</a></td><td>LWG</td><td>LWG 2511: guaranteed copy elision for piecewise construction</td><td>Rapperswil</td><td></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0476R2">P0476R2</a></td><td>LWG</td><td>Bit-casting object representations</td><td>Rapperswil</td><td></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0528R3">P0528R3</a></td><td>CWG</td><td>The Curious Case of Padding Bits, Featuring Atomic Compare-and-Exchange</td><td>Rapperswil</td><td></td><td></td></tr>

From e5c4b468f06307bc1b8341af9ccf9dd69fa890f4 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Tue, 16 Jul 2019 03:25:50 +0000
Subject: [PATCH 202/451] hwasan: Pad arrays with non-1 size correctly.

Spotted by eugenis.

Differential Revision: https://reviews.llvm.org/D64783

llvm-svn: 366171
---
 .../Instrumentation/HWAddressSanitizer.cpp        | 14 +++++++++-----
 .../HWAddressSanitizer/alloca-array.ll            | 15 +++++++++++++++
 .../HWAddressSanitizer/alloca-with-calls.ll       |  4 ++--
 .../Instrumentation/HWAddressSanitizer/alloca.ll  | 10 +++++-----
 .../HWAddressSanitizer/kernel-alloca.ll           |  4 ++--
 5 files changed, 33 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll

diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index a961c81358792..450ae2f79026e 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1108,8 +1108,14 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
     uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment());
     AI->setAlignment(std::max(AI->getAlignment(), 16u));
     if (Size != AlignedSize) {
+      Type *AllocatedType = AI->getAllocatedType();
+      if (AI->isArrayAllocation()) {
+        uint64_t ArraySize =
+            cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+        AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+      }
       Type *TypeWithPadding = StructType::get(
-          AI->getAllocatedType(), ArrayType::get(Int8Ty, AlignedSize - Size));
+          AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
       auto *NewAI = new AllocaInst(
           TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
       NewAI->takeName(AI);
@@ -1117,10 +1123,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
       NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
       NewAI->setSwiftError(AI->isSwiftError());
       NewAI->copyMetadata(*AI);
-      Value *Zero = ConstantInt::get(Int32Ty, 0);
-      auto *GEP = GetElementPtrInst::Create(TypeWithPadding, NewAI,
-                                            {Zero, Zero}, "", AI);
-      AI->replaceAllUsesWith(GEP);
+      auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+      AI->replaceAllUsesWith(Bitcast);
       AllocaToPaddedAllocaMap[AI] = NewAI;
     }
   }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll
new file mode 100644
index 0000000000000..7a83d0e2bb026
--- /dev/null
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -hwasan -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+declare void @use(i8*, i8*)
+
+define void @test_alloca() sanitize_hwaddress {
+  ; CHECK: alloca { [4 x i8], [12 x i8] }, align 16
+  %x = alloca i8, i64 4
+  ; CHECK: alloca i8, i64 16, align 16
+  %y = alloca i8, i64 16
+  call void @use(i8* %x, i8* %y)
+  ret void
+}
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll
index d47c38ff58902..2d0d113dedff9 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll
@@ -9,10 +9,10 @@ declare void @use32(i32*)
 
 define void @test_alloca() sanitize_hwaddress {
 ; CHECK-LABEL: @test_alloca(
-; CHECK: %[[GEP:[^ ]*]] = getelementptr { i32, [12 x i8] }, { i32, [12 x i8] }* %x, i32 0, i32 0
+; CHECK: %[[BC:[^ ]*]] = bitcast { i32, [12 x i8] }* %x to i32*
 ; CHECK: %[[T1:[^ ]*]] = call i8 @__hwasan_generate_tag()
 ; CHECK: %[[A:[^ ]*]] = zext i8 %[[T1]] to i64
-; CHECK: %[[B:[^ ]*]] = ptrtoint i32* %[[GEP]] to i64
+; CHECK: %[[B:[^ ]*]] = ptrtoint i32* %[[BC]] to i64
 ; CHECK: %[[C:[^ ]*]] = shl i64 %[[A]], 56
 ; CHECK: or i64 %[[B]], %[[C]]
 
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll
index 65272caf6f4b8..ea2b566a744a5 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll
@@ -17,28 +17,28 @@ define void @test_alloca() sanitize_hwaddress {
 ; CHECK: %[[BASE_TAG:[^ ]*]] = xor i64 %[[A]], %[[B]]
 
 ; CHECK: %[[X:[^ ]*]] = alloca { i32, [12 x i8] }, align 16
-; CHECK: %[[X_GEP:[^ ]*]] = getelementptr { i32, [12 x i8] }, { i32, [12 x i8] }* %[[X]], i32 0, i32 0
+; CHECK: %[[X_BC:[^ ]*]] = bitcast { i32, [12 x i8] }* %[[X]] to i32*
 ; CHECK: %[[X_TAG:[^ ]*]] = xor i64 %[[BASE_TAG]], 0
-; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64
+; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64
 ; CHECK: %[[C:[^ ]*]] = shl i64 %[[X_TAG]], 56
 ; CHECK: %[[D:[^ ]*]] = or i64 %[[X1]], %[[C]]
 ; CHECK: %[[X_HWASAN:[^ ]*]] = inttoptr i64 %[[D]] to i32*
 
 ; CHECK: %[[X_TAG2:[^ ]*]] = trunc i64 %[[X_TAG]] to i8
-; CHECK: %[[E:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64
+; CHECK: %[[E:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64
 ; CHECK: %[[F:[^ ]*]] = lshr i64 %[[E]], 4
 ; DYNAMIC-SHADOW: %[[X_SHADOW:[^ ]*]] = getelementptr i8, i8* %.hwasan.shadow, i64 %[[F]]
 ; ZERO-BASED-SHADOW: %[[X_SHADOW:[^ ]*]] = inttoptr i64 %[[F]] to i8*
 ; CHECK: %[[X_SHADOW_GEP:[^ ]*]] = getelementptr i8, i8* %[[X_SHADOW]], i32 0
 ; CHECK: store i8 4, i8* %[[X_SHADOW_GEP]]
-; CHECK: %[[X_I8:[^ ]*]] = bitcast i32* %[[X_GEP]] to i8*
+; CHECK: %[[X_I8:[^ ]*]] = bitcast i32* %[[X_BC]] to i8*
 ; CHECK: %[[X_I8_GEP:[^ ]*]] = getelementptr i8, i8* %[[X_I8]], i32 15
 ; CHECK: store i8 %[[X_TAG2]], i8* %[[X_I8_GEP]]
 ; CHECK: call void @use32(i32* nonnull %[[X_HWASAN]])
 
 ; UAR-TAGS: %[[BASE_TAG_COMPL:[^ ]*]] = xor i64 %[[BASE_TAG]], 255
 ; UAR-TAGS: %[[X_TAG_UAR:[^ ]*]] = trunc i64 %[[BASE_TAG_COMPL]] to i8
-; CHECK: %[[E2:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64
+; CHECK: %[[E2:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64
 ; CHECK: %[[F2:[^ ]*]] = lshr i64 %[[E2]], 4
 ; DYNAMIC-SHADOW: %[[X_SHADOW2:[^ ]*]] = getelementptr i8, i8* %.hwasan.shadow, i64 %[[F2]]
 ; ZERO-BASED-SHADOW: %[[X_SHADOW2:[^ ]*]] = inttoptr i64 %[[F2]] to i8*
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll
index 8ab8725bcedc0..ddf81c482ccd9 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll
@@ -15,9 +15,9 @@ define void @test_alloca() sanitize_hwaddress {
 ; CHECK: %[[BASE_TAG:[^ ]*]] = xor i64 %[[A]], %[[B]]
 
 ; CHECK: %[[X:[^ ]*]] = alloca { i32, [12 x i8] }, align 16
-; CHECK: %[[X_GEP:[^ ]*]] = getelementptr { i32, [12 x i8] }, { i32, [12 x i8] }* %[[X]], i32 0, i32 0
+; CHECK: %[[X_BC:[^ ]*]] = bitcast { i32, [12 x i8] }* %[[X]] to i32*
 ; CHECK: %[[X_TAG:[^ ]*]] = xor i64 %[[BASE_TAG]], 0
-; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64
+; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64
 ; CHECK: %[[C:[^ ]*]] = shl i64 %[[X_TAG]], 56
 ; CHECK: %[[D:[^ ]*]] = or i64 %[[C]], 72057594037927935
 ; CHECK: %[[E:[^ ]*]] = and i64 %[[X1]], %[[D]]

From 4ac0b9be230596e24e439109f2d23ea3dd81ebfd Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Tue, 16 Jul 2019 03:47:34 +0000
Subject: [PATCH 203/451] [RISCV] Make RISCVELFObjectWriter::getRelocType check
 IsPCRel

Previously, this function didn't check the IsPCRel argument. But doing so is a
useful check for errors, and also seemingly necessary for FK_Data_4 (which we
produce a R_RISCV_32_PCREL relocation for if IsPCRel).

Other than R_RISCV_32_PCREL, this should be NFC. Future exception handling
related patches will include tests that capture this behaviour.

llvm-svn: 366172
---
 .../MCTargetDesc/RISCVELFObjectWriter.cpp     | 61 +++++++++++--------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index c910f2ca39fec..3ccbc86d2619a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -48,7 +48,42 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
                                             const MCFixup &Fixup,
                                             bool IsPCRel) const {
   // Determine the type of the relocation
-  switch ((unsigned)Fixup.getKind()) {
+  unsigned Kind = Fixup.getKind();
+  if (IsPCRel) {
+    switch (Kind) {
+    default:
+      llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+    case FK_PCRel_4:
+      return ELF::R_RISCV_32_PCREL;
+    case RISCV::fixup_riscv_pcrel_hi20:
+      return ELF::R_RISCV_PCREL_HI20;
+    case RISCV::fixup_riscv_pcrel_lo12_i:
+      return ELF::R_RISCV_PCREL_LO12_I;
+    case RISCV::fixup_riscv_pcrel_lo12_s:
+      return ELF::R_RISCV_PCREL_LO12_S;
+    case RISCV::fixup_riscv_got_hi20:
+      return ELF::R_RISCV_GOT_HI20;
+    case RISCV::fixup_riscv_tls_got_hi20:
+      return ELF::R_RISCV_TLS_GOT_HI20;
+    case RISCV::fixup_riscv_tls_gd_hi20:
+      return ELF::R_RISCV_TLS_GD_HI20;
+    case RISCV::fixup_riscv_jal:
+      return ELF::R_RISCV_JAL;
+    case RISCV::fixup_riscv_branch:
+      return ELF::R_RISCV_BRANCH;
+    case RISCV::fixup_riscv_rvc_jump:
+      return ELF::R_RISCV_RVC_JUMP;
+    case RISCV::fixup_riscv_rvc_branch:
+      return ELF::R_RISCV_RVC_BRANCH;
+    case RISCV::fixup_riscv_call:
+      return ELF::R_RISCV_CALL;
+    case RISCV::fixup_riscv_call_plt:
+      return ELF::R_RISCV_CALL_PLT;
+    }
+  }
+
+  switch (Kind) {
   default:
     llvm_unreachable("invalid fixup kind!");
   case FK_Data_4:
@@ -77,14 +112,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_RISCV_LO12_I;
   case RISCV::fixup_riscv_lo12_s:
     return ELF::R_RISCV_LO12_S;
-  case RISCV::fixup_riscv_pcrel_hi20:
-    return ELF::R_RISCV_PCREL_HI20;
-  case RISCV::fixup_riscv_pcrel_lo12_i:
-    return ELF::R_RISCV_PCREL_LO12_I;
-  case RISCV::fixup_riscv_pcrel_lo12_s:
-    return ELF::R_RISCV_PCREL_LO12_S;
-  case RISCV::fixup_riscv_got_hi20:
-    return ELF::R_RISCV_GOT_HI20;
   case RISCV::fixup_riscv_tprel_hi20:
     return ELF::R_RISCV_TPREL_HI20;
   case RISCV::fixup_riscv_tprel_lo12_i:
@@ -93,22 +120,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_RISCV_TPREL_LO12_S;
   case RISCV::fixup_riscv_tprel_add:
     return ELF::R_RISCV_TPREL_ADD;
-  case RISCV::fixup_riscv_tls_got_hi20:
-    return ELF::R_RISCV_TLS_GOT_HI20;
-  case RISCV::fixup_riscv_tls_gd_hi20:
-    return ELF::R_RISCV_TLS_GD_HI20;
-  case RISCV::fixup_riscv_jal:
-    return ELF::R_RISCV_JAL;
-  case RISCV::fixup_riscv_branch:
-    return ELF::R_RISCV_BRANCH;
-  case RISCV::fixup_riscv_rvc_jump:
-    return ELF::R_RISCV_RVC_JUMP;
-  case RISCV::fixup_riscv_rvc_branch:
-    return ELF::R_RISCV_RVC_BRANCH;
-  case RISCV::fixup_riscv_call:
-    return ELF::R_RISCV_CALL;
-  case RISCV::fixup_riscv_call_plt:
-    return ELF::R_RISCV_CALL_PLT;
   case RISCV::fixup_riscv_relax:
     return ELF::R_RISCV_RELAX;
   case RISCV::fixup_riscv_align:

From a3c7b27419a52d612fe2cad319aafba1e226502b Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Tue, 16 Jul 2019 03:54:08 +0000
Subject: [PATCH 204/451] [RISCV][NFC] Fix HasStedExtA -> HasStdExtA typo in
 comment

Differential Revision: https://reviews.llvm.org/D64011
Patch by James Clarke.

llvm-svn: 366173
---
 llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index b35c2f128bae0..b768c9347b385 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -84,7 +84,7 @@ defm AMOMIN_D   : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">;
 defm AMOMAX_D   : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">;
 defm AMOMINU_D  : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">;
 defm AMOMAXU_D  : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">;
-} // Predicates = [HasStedExtA, IsRV64]
+} // Predicates = [HasStdExtA, IsRV64]
 
 //===----------------------------------------------------------------------===//
 // Pseudo-instructions and codegen patterns

From ef8577ef98717c1c6a66293de3b2cc0f09e8c3ff Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Tue, 16 Jul 2019 03:56:45 +0000
Subject: [PATCH 205/451] [RISCV][NFC] Split PseudoCALL pattern out from
 instruction

Since PseudoCALL defines AsmString, it can be generated from assembly,
and so code-gen patterns should be defined separately to be consistent
with the style of the RISCV backend. Other pseudo-instructions exist
that have code-gen patterns defined directly, but these instructions are
purely for code-gen and cannot be written in assembly.

Differential Revision: https://reviews.llvm.org/D64012
Patch by James Clarke.

llvm-svn: 366174
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 6c5218ba78359..b017307b46173 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -886,11 +886,11 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> {
 // Define AsmString to print "call" when compile with -S flag.
 // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
 let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in
-def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func),
-                        [(riscv_call tglobaladdr:$func)]> {
+def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> {
   let AsmString = "call\t$func";
 }
 
+def : Pat<(riscv_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
 def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
 
 def : Pat<(riscv_uret_flag), (URET X0, X0)>;

From e9ad0cf6cf79cfa5f8ce99db0f7161e110850011 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Tue, 16 Jul 2019 04:37:19 +0000
Subject: [PATCH 206/451] [RISCV] Fix a potential issue in
 shouldInsertFixupForCodeAlign()

The bool result of shouldInsertExtraNopBytesForCodeAlign() is not checked but
the returned nop count is unconditionally read even though it could be
uninitialized.

Differential Revision: https://reviews.llvm.org/D63285
Patch by Edward Jones.

llvm-svn: 366175
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index db25efb160f60..821ac2033c939 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -329,11 +329,10 @@ bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
   if (!STI.getFeatureBits()[RISCV::FeatureRelax])
     return false;
 
-  // Calculate total Nops we need to insert.
+  // Calculate total Nops we need to insert. If there are none to insert
+  // then simply return.
   unsigned Count;
-  shouldInsertExtraNopBytesForCodeAlign(AF, Count);
-  // No Nop need to insert, simply return.
-  if (Count == 0)
+  if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0))
     return false;
 
   MCContext &Ctx = Asm.getContext();

From bb479ca311958120279cf2c21da3a9d8d06ceb17 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Tue, 16 Jul 2019 04:40:25 +0000
Subject: [PATCH 207/451] [RISCV] Avoid overflow when determining number of
 nops for code align

RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign() assumed that the
align specified would be greater than or equal to the minimum nop length, but
that is not always the case - for example if a user specifies ".align 0" in
assembly.

Differential Revision: https://reviews.llvm.org/D63274
Patch by Edward Jones.

llvm-svn: 366176
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 8 ++++++--
 llvm/test/MC/RISCV/align.s                             | 7 +++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 821ac2033c939..ee5f760ebcb0e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -313,8 +313,12 @@ bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
   bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC];
   unsigned MinNopLen = HasStdExtC ? 2 : 4;
 
-  Size = AF.getAlignment() - MinNopLen;
-  return true;
+  if (AF.getAlignment() <= MinNopLen) {
+    return false;
+  } else {
+    Size = AF.getAlignment() - MinNopLen;
+    return true;
+  }
 }
 
 // We need to insert R_RISCV_ALIGN relocation type to indicate the
diff --git a/llvm/test/MC/RISCV/align.s b/llvm/test/MC/RISCV/align.s
index e62af93155587..b4b3e6aa778ae 100644
--- a/llvm/test/MC/RISCV/align.s
+++ b/llvm/test/MC/RISCV/align.s
@@ -90,6 +90,13 @@ test:
 	ret
 # NORELAX-RELOC-NOT: R_RISCV
 # C-EXT-NORELAX-RELOC-NOT: R_RISCV
+# Code alignment of a byte size less than the size of a nop must be treated
+# as no alignment. This used to trigger a fatal error with relaxation enabled
+# as the calculation to emit the worst-case sequence of nops would overflow.
+	.p2align        1
+	add	a0, a0, a1
+	.p2align        0
+	add	a0, a0, a1
 # We only need to insert R_RISCV_ALIGN for code section
 # when the linker relaxation enabled.
         .data

From 49a3ad21d6034eb20f99f228dbebcc5f65a748d8 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Tue, 16 Jul 2019 04:46:31 +0000
Subject: [PATCH 208/451] Fix parameter name comments using clang-tidy. NFC.

This patch applies clang-tidy's bugprone-argument-comment tool
to LLVM, clang and lld source trees. Here is how I created this
patch:

$ git clone https://github.com/llvm/llvm-project.git
$ cd llvm-project
$ mkdir build
$ cd build
$ cmake -GNinja -DCMAKE_BUILD_TYPE=Debug \
    -DLLVM_ENABLE_PROJECTS='clang;lld;clang-tools-extra' \
    -DCMAKE_EXPORT_COMPILE_COMMANDS=On -DLLVM_ENABLE_LLD=On \
    -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ ../llvm
$ ninja
$ parallel clang-tidy -checks='-*,bugprone-argument-comment' \
    -config='{CheckOptions: [{key: StrictMode, value: 1}]}' -fix \
    ::: ../llvm/lib/**/*.{cpp,h} ../clang/lib/**/*.{cpp,h} ../lld/**/*.{cpp,h}

llvm-svn: 366177
---
 clang/lib/ARCMigrate/ARCMT.cpp                |  2 +-
 clang/lib/ARCMigrate/ObjCMT.cpp               |  4 +-
 clang/lib/ARCMigrate/TransGCAttrs.cpp         |  2 +-
 clang/lib/AST/Expr.cpp                        |  2 +-
 clang/lib/AST/ItaniumMangle.cpp               |  2 +-
 clang/lib/AST/Mangle.cpp                      |  2 +-
 clang/lib/AST/ScanfFormatString.cpp           |  2 +-
 clang/lib/AST/Type.cpp                        |  4 +-
 clang/lib/AST/VTableBuilder.cpp               |  4 +-
 clang/lib/Analysis/BodyFarm.cpp               | 24 +++++-----
 clang/lib/Analysis/CFG.cpp                    |  4 +-
 clang/lib/Basic/FixedPoint.cpp                |  4 +-
 clang/lib/CodeGen/CGBuilder.h                 |  2 +-
 clang/lib/CodeGen/CGBuiltin.cpp               | 12 ++---
 clang/lib/CodeGen/CGCXX.cpp                   |  2 +-
 clang/lib/CodeGen/CGCXXABI.cpp                |  2 +-
 clang/lib/CodeGen/CGCall.cpp                  |  6 +--
 clang/lib/CodeGen/CGCoroutine.cpp             |  2 +-
 clang/lib/CodeGen/CGDecl.cpp                  |  2 +-
 clang/lib/CodeGen/CGException.cpp             |  8 ++--
 clang/lib/CodeGen/CGExpr.cpp                  | 12 ++---
 clang/lib/CodeGen/CGExprCXX.cpp               |  2 +-
 clang/lib/CodeGen/CGExprConstant.cpp          |  2 +-
 clang/lib/CodeGen/CGObjC.cpp                  |  2 +-
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 26 +++++------
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  4 +-
 clang/lib/CodeGen/CodeGenABITypes.cpp         |  2 +-
 clang/lib/CodeGen/CodeGenModule.cpp           |  2 +-
 clang/lib/CodeGen/CoverageMappingGen.cpp      |  2 +-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           | 22 +++++-----
 clang/lib/CodeGen/MicrosoftCXXABI.cpp         | 32 +++++++-------
 clang/lib/CodeGen/TargetInfo.cpp              |  2 +-
 clang/lib/Driver/Driver.cpp                   |  4 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |  8 ++--
 clang/lib/Driver/ToolChains/MSVC.cpp          |  4 +-
 clang/lib/Format/UnwrappedLineFormatter.cpp   |  2 +-
 clang/lib/Frontend/FrontendActions.cpp        |  4 +-
 .../Frontend/Rewrite/RewriteModernObjC.cpp    | 10 ++---
 clang/lib/Frontend/Rewrite/RewriteObjC.cpp    | 10 ++---
 clang/lib/Index/IndexDecl.cpp                 |  4 +-
 clang/lib/Lex/HeaderSearch.cpp                |  8 ++--
 clang/lib/Lex/PPDirectives.cpp                |  6 +--
 clang/lib/Lex/PPMacroExpansion.cpp            |  2 +-
 clang/lib/Lex/Pragma.cpp                      | 10 ++---
 clang/lib/Lex/Preprocessor.cpp                |  2 +-
 clang/lib/Parse/ParseCXXInlineMethods.cpp     |  2 +-
 clang/lib/Parse/ParseDecl.cpp                 |  2 +-
 clang/lib/Parse/ParseDeclCXX.cpp              |  4 +-
 clang/lib/Parse/ParseExpr.cpp                 |  8 ++--
 clang/lib/Parse/ParseExprCXX.cpp              | 16 +++----
 clang/lib/Parse/ParseObjc.cpp                 | 12 ++---
 clang/lib/Parse/ParsePragma.cpp               |  6 +--
 clang/lib/Parse/ParseTemplate.cpp             |  2 +-
 clang/lib/Sema/SemaCast.cpp                   |  6 +--
 clang/lib/Sema/SemaChecking.cpp               |  2 +-
 clang/lib/Sema/SemaCodeComplete.cpp           | 10 ++---
 clang/lib/Sema/SemaDecl.cpp                   |  2 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |  6 +--
 clang/lib/Sema/SemaDeclCXX.cpp                |  8 ++--
 clang/lib/Sema/SemaDeclObjC.cpp               |  2 +-
 clang/lib/Sema/SemaExpr.cpp                   | 30 ++++++-------
 clang/lib/Sema/SemaExprCXX.cpp                |  2 +-
 clang/lib/Sema/SemaExprMember.cpp             | 14 +++---
 clang/lib/Sema/SemaInit.cpp                   |  6 +--
 clang/lib/Sema/SemaLambda.cpp                 | 10 ++---
 clang/lib/Sema/SemaModule.cpp                 |  4 +-
 clang/lib/Sema/SemaObjCProperty.cpp           |  2 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  4 +-
 clang/lib/Sema/SemaOverload.cpp               | 44 +++++++++----------
 clang/lib/Sema/SemaStmt.cpp                   |  4 +-
 clang/lib/Sema/SemaStmtAsm.cpp                |  2 +-
 clang/lib/Sema/SemaTemplate.cpp               |  6 +--
 clang/lib/Sema/SemaTemplateDeduction.cpp      |  6 +--
 clang/lib/Sema/SemaType.cpp                   |  2 +-
 clang/lib/Serialization/ASTReader.cpp         |  2 +-
 clang/lib/Serialization/ASTReaderStmt.cpp     |  2 +-
 clang/lib/Serialization/ASTWriter.cpp         |  2 +-
 clang/lib/Serialization/GlobalModuleIndex.cpp |  4 +-
 clang/lib/Serialization/ModuleManager.cpp     | 12 ++---
 .../Checkers/DynamicTypePropagation.cpp       |  6 +--
 .../Checkers/GCDAntipatternChecker.cpp        |  2 +-
 .../Checkers/IdenticalExprChecker.cpp         |  2 +-
 .../StaticAnalyzer/Checkers/MallocChecker.cpp |  2 +-
 .../Checkers/NullabilityChecker.cpp           |  2 +-
 .../Checkers/OSObjectCStyleCast.cpp           |  2 +-
 .../Checkers/ObjCAutoreleaseWriteChecker.cpp  |  2 +-
 .../RetainCountChecker/RetainCountChecker.cpp |  2 +-
 .../RetainCountDiagnostics.cpp                |  2 +-
 .../RunLoopAutoreleaseLeakChecker.cpp         |  2 +-
 .../Checkers/TrustNonnullChecker.cpp          |  4 +-
 .../StaticAnalyzer/Core/AnalysisManager.cpp   |  2 +-
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp |  2 +-
 .../StaticAnalyzer/Core/DynamicTypeMap.cpp    |  2 +-
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |  2 +-
 .../Core/ExprEngineCallAndReturn.cpp          | 12 ++---
 clang/lib/StaticAnalyzer/Core/RegionStore.cpp |  2 +-
 .../StaticAnalyzer/Core/SimpleSValBuilder.cpp |  2 +-
 lld/COFF/Driver.cpp                           |  4 +-
 lld/COFF/DriverUtils.cpp                      |  2 +-
 lld/ELF/Driver.cpp                            |  8 ++--
 lld/ELF/InputFiles.cpp                        |  6 +--
 lld/ELF/ScriptParser.cpp                      | 12 ++---
 lld/ELF/Thunks.cpp                            |  2 +-
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  4 +-
 llvm/lib/CodeGen/EdgeBundles.cpp              |  2 +-
 llvm/lib/CodeGen/MachineBasicBlock.cpp        |  2 +-
 llvm/lib/CodeGen/MachineBlockPlacement.cpp    |  2 +-
 llvm/lib/CodeGen/MachineFrameInfo.cpp         |  2 +-
 llvm/lib/CodeGen/MachineFunction.cpp          |  2 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp    | 30 ++++++-------
 .../SelectionDAG/FunctionLoweringInfo.cpp     |  2 +-
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  2 +-
 llvm/lib/CodeGen/WinEHPrepare.cpp             |  4 +-
 llvm/lib/IR/LegacyPassManager.cpp             |  2 +-
 llvm/lib/Support/APSInt.cpp                   |  6 +--
 llvm/lib/Support/LowLevelType.cpp             |  4 +-
 llvm/lib/Support/raw_ostream.cpp              |  2 +-
 llvm/lib/Target/AArch64/AArch64FastISel.cpp   |  6 +--
 .../AMDGPUOpenCLEnqueuedBlockLowering.cpp     |  4 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  6 +--
 .../MCTargetDesc/LanaiELFObjectWriter.cpp     |  2 +-
 .../MCTargetDesc/SystemZMCObjectWriter.cpp    |  4 +-
 .../MCTargetDesc/WebAssemblyInstPrinter.cpp   |  2 +-
 .../WebAssembly/WebAssemblyFastISel.cpp       |  2 +-
 .../WebAssembly/WebAssemblyRegisterInfo.cpp   |  6 +--
 llvm/lib/Target/X86/X86FastISel.cpp           |  2 +-
 llvm/lib/Target/X86/X86FrameLowering.cpp      |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  4 +-
 llvm/lib/Target/X86/X86WinAllocaExpander.cpp  |  2 +-
 llvm/lib/Transforms/Coroutines/CoroEarly.cpp  |  2 +-
 llvm/lib/Transforms/Coroutines/CoroFrame.cpp  |  2 +-
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp  |  2 +-
 .../InstCombine/InstCombineAndOrXor.cpp       |  2 +-
 .../InstCombine/InstCombineMulDivRem.cpp      |  8 ++--
 .../Instrumentation/HWAddressSanitizer.cpp    |  2 +-
 .../Scalar/CorrelatedValuePropagation.cpp     |  2 +-
 llvm/lib/Transforms/Scalar/Float2Int.cpp      |  2 +-
 .../Transforms/Scalar/LoopStrengthReduce.cpp  |  2 +-
 llvm/lib/Transforms/Utils/LowerSwitch.cpp     |  2 +-
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |  2 +-
 140 files changed, 372 insertions(+), 372 deletions(-)

diff --git a/clang/lib/ARCMigrate/ARCMT.cpp b/clang/lib/ARCMigrate/ARCMT.cpp
index 6a5efc00d7e85..568e06f21fba6 100644
--- a/clang/lib/ARCMigrate/ARCMT.cpp
+++ b/clang/lib/ARCMigrate/ARCMT.cpp
@@ -514,7 +514,7 @@ MigrationProcess::MigrationProcess(
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
       new DiagnosticsEngine(DiagID, &CI.getDiagnosticOpts(),
                             DiagClient, /*ShouldOwnClient=*/false));
-    Remapper.initFromDisk(outputDir, *Diags, /*ignoreIfFilesChanges=*/true);
+    Remapper.initFromDisk(outputDir, *Diags, /*ignoreIfFilesChanged=*/true);
   }
 }
 
diff --git a/clang/lib/ARCMigrate/ObjCMT.cpp b/clang/lib/ARCMigrate/ObjCMT.cpp
index f22e03f490f47..7126a0873ea0c 100644
--- a/clang/lib/ARCMigrate/ObjCMT.cpp
+++ b/clang/lib/ARCMigrate/ObjCMT.cpp
@@ -110,7 +110,7 @@ class ObjCMigrateASTConsumer : public ASTConsumer {
     if (!Summaries)
       Summaries.reset(new RetainSummaryManager(Ctx,
                                                /*TrackNSCFObjects=*/true,
-                                               /*TrackOSObjects=*/false));
+                                               /*trackOSObjects=*/false));
     return *Summaries;
   }
 
@@ -216,7 +216,7 @@ ObjCMigrateAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
 
 bool ObjCMigrateAction::BeginInvocation(CompilerInstance &CI) {
   Remapper.initFromDisk(MigrateDir, CI.getDiagnostics(),
-                        /*ignoreIfFilesChanges=*/true);
+                        /*ignoreIfFilesChanged=*/true);
   CompInst = &CI;
   CI.getDiagnostics().setIgnoreAllWarnings(true);
   return true;
diff --git a/clang/lib/ARCMigrate/TransGCAttrs.cpp b/clang/lib/ARCMigrate/TransGCAttrs.cpp
index fdbe1d119af81..5e3162197ed1b 100644
--- a/clang/lib/ARCMigrate/TransGCAttrs.cpp
+++ b/clang/lib/ARCMigrate/TransGCAttrs.cpp
@@ -269,7 +269,7 @@ static void checkAllAtProps(MigrationContext &MigrateCtx,
     StringRef toAttr = "strong";
     if (hasWeak) {
       if (canApplyWeak(MigrateCtx.Pass.Ctx, IndProps.front()->getType(),
-                       /*AllowOnUnkwownClass=*/true))
+                       /*AllowOnUnknownClass=*/true))
         toAttr = "weak";
       else
         toAttr = "unsafe_unretained";
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 10ab2bf72b72e..6ef77b8aee684 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2201,7 +2201,7 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
   case SourceLocExpr::Line:
   case SourceLocExpr::Column: {
     llvm::APSInt IntVal(Ctx.getIntWidth(Ctx.UnsignedIntTy),
-                        /*IsUnsigned=*/true);
+                        /*isUnsigned=*/true);
     IntVal = getIdentKind() == SourceLocExpr::Line ? PLoc.getLine()
                                                    : PLoc.getColumn();
     return APValue(IntVal);
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 5f4644b0f7c31..6c813f09a4b3c 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3787,7 +3787,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
     if (TypeSourceInfo *ScopeInfo = PDE->getScopeTypeInfo()) {
       if (Qualifier) {
         mangleUnresolvedPrefix(Qualifier,
-                               /*Recursive=*/true);
+                               /*recursive=*/true);
         mangleUnresolvedTypeOrSimpleId(ScopeInfo->getType());
         Out << 'E';
       } else {
diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp
index 5582dc582aca2..625282368a4d1 100644
--- a/clang/lib/AST/Mangle.cpp
+++ b/clang/lib/AST/Mangle.cpp
@@ -379,7 +379,7 @@ class ASTNameGenerator::Implementation {
 
     auto hasDefaultCXXMethodCC = [](ASTContext &C, const CXXMethodDecl *MD) {
       auto DefaultCC = C.getDefaultCallingConvention(/*IsVariadic=*/false,
-                                                     /*IsCSSMethod=*/true);
+                                                     /*IsCXXMethod=*/true);
       auto CC = MD->getType()->getAs<FunctionProtoType>()->getCallConv();
       return CC == DefaultCC;
     };
diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp
index 1a87de70f86b7..8d763f28e57fd 100644
--- a/clang/lib/AST/ScanfFormatString.cpp
+++ b/clang/lib/AST/ScanfFormatString.cpp
@@ -142,7 +142,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
   }
 
   // Look for the length modifier.
-  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
+  if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
       // No more characters left?
     H.HandleIncompleteSpecifier(Start, E - Start);
     return true;
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 01e93c11aad87..ed75a0b5bcd85 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -4080,7 +4080,7 @@ CXXRecordDecl *MemberPointerType::getMostRecentCXXRecordDecl() const {
 void clang::FixedPointValueToString(SmallVectorImpl<char> &Str,
                                     llvm::APSInt Val, unsigned Scale) {
   FixedPointSemantics FXSema(Val.getBitWidth(), Scale, Val.isSigned(),
-                             /*isSaturated=*/false,
-                             /*hasUnsignedPadding=*/false);
+                             /*IsSaturated=*/false,
+                             /*HasUnsignedPadding=*/false);
   APFixedPoint(Val, FXSema).toString(Str);
 }
diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp
index 5c79ea37ab11d..0c699571555d5 100644
--- a/clang/lib/AST/VTableBuilder.cpp
+++ b/clang/lib/AST/VTableBuilder.cpp
@@ -1272,7 +1272,7 @@ ThisAdjustment ItaniumVTableBuilder::ComputeThisAdjustment(
       // We don't have vcall offsets for this virtual base, go ahead and
       // build them.
       VCallAndVBaseOffsetBuilder Builder(MostDerivedClass, MostDerivedClass,
-                                         /*FinalOverriders=*/nullptr,
+                                         /*Overriders=*/nullptr,
                                          BaseSubobject(Offset.VirtualBase,
                                                        CharUnits::Zero()),
                                          /*BaseIsVirtual=*/true,
@@ -2245,7 +2245,7 @@ ItaniumVTableContext::getVirtualBaseOffsetOffset(const CXXRecordDecl *RD,
   if (I != VirtualBaseClassOffsetOffsets.end())
     return I->second;
 
-  VCallAndVBaseOffsetBuilder Builder(RD, RD, /*FinalOverriders=*/nullptr,
+  VCallAndVBaseOffsetBuilder Builder(RD, RD, /*Overriders=*/nullptr,
                                      BaseSubobject(RD, CharUnits::Zero()),
                                      /*BaseIsVirtual=*/false,
                                      /*OffsetInLayoutClass=*/CharUnits::Zero());
diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp
index 7e636ed1f5889..576f86516017b 100644
--- a/clang/lib/Analysis/BodyFarm.cpp
+++ b/clang/lib/Analysis/BodyFarm.cpp
@@ -293,7 +293,7 @@ static CallExpr *create_call_once_lambda_call(ASTContext &C, ASTMaker M,
 
   return CXXOperatorCallExpr::Create(
       /*AstContext=*/C, OO_Call, callOperatorDeclRef,
-      /*args=*/CallArgs,
+      /*Args=*/CallArgs,
       /*QualType=*/C.VoidTy,
       /*ExprValueType=*/VK_RValue,
       /*SourceLocation=*/SourceLocation(), FPOptions());
@@ -465,10 +465,10 @@ static Stmt *create_call_once(ASTContext &C, const FunctionDecl *D) {
   auto *Out =
       IfStmt::Create(C, SourceLocation(),
                      /* IsConstexpr=*/false,
-                     /* init=*/nullptr,
-                     /* var=*/nullptr,
-                     /* cond=*/FlagCheck,
-                     /* then=*/M.makeCompound({CallbackCall, FlagAssignment}));
+                     /* Init=*/nullptr,
+                     /* Var=*/nullptr,
+                     /* Cond=*/FlagCheck,
+                     /* Then=*/M.makeCompound({CallbackCall, FlagAssignment}));
 
   return Out;
 }
@@ -511,7 +511,7 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) {
   CallExpr *CE = CallExpr::Create(
       /*ASTContext=*/C,
       /*StmtClass=*/M.makeLvalueToRvalue(/*Expr=*/Block),
-      /*args=*/None,
+      /*Args=*/None,
       /*QualType=*/C.VoidTy,
       /*ExprValueType=*/VK_RValue,
       /*SourceLocation=*/SourceLocation());
@@ -549,10 +549,10 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) {
   // (5) Create the 'if' statement.
   auto *If = IfStmt::Create(C, SourceLocation(),
                             /* IsConstexpr=*/false,
-                            /* init=*/nullptr,
-                            /* var=*/nullptr,
-                            /* cond=*/GuardCondition,
-                            /* then=*/CS);
+                            /* Init=*/nullptr,
+                            /* Var=*/nullptr,
+                            /* Cond=*/GuardCondition,
+                            /* Then=*/CS);
   return If;
 }
 
@@ -657,8 +657,8 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D)
   /// Construct the If.
   auto *If = IfStmt::Create(C, SourceLocation(),
                             /* IsConstexpr=*/false,
-                            /* init=*/nullptr,
-                            /* var=*/nullptr, Comparison, Body,
+                            /* Init=*/nullptr,
+                            /* Var=*/nullptr, Comparison, Body,
                             SourceLocation(), Else);
 
   return If;
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index f92d311111e7b..de89105a29257 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -2931,8 +2931,8 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) {
 
     // Add the successors.  If we know that specific branches are
     // unreachable, inform addSuccessor() of that knowledge.
-    addSuccessor(Block, ThenBlock, /* isReachable = */ !KnownVal.isFalse());
-    addSuccessor(Block, ElseBlock, /* isReachable = */ !KnownVal.isTrue());
+    addSuccessor(Block, ThenBlock, /* IsReachable = */ !KnownVal.isFalse());
+    addSuccessor(Block, ElseBlock, /* IsReachable = */ !KnownVal.isTrue());
 
     // Add the condition as the last statement in the new block.  This may
     // create new blocks as the condition may contain control-flow.  Any newly
diff --git a/clang/lib/Basic/FixedPoint.cpp b/clang/lib/Basic/FixedPoint.cpp
index f049e6f64a502..05600dfc6d212 100644
--- a/clang/lib/Basic/FixedPoint.cpp
+++ b/clang/lib/Basic/FixedPoint.cpp
@@ -190,12 +190,12 @@ void APFixedPoint::toString(llvm::SmallVectorImpl<char> &Str) const {
   llvm::APInt FractPartMask = llvm::APInt::getAllOnesValue(Scale).zext(Width);
   llvm::APInt RadixInt = llvm::APInt(Width, 10);
 
-  IntPart.toString(Str, /*radix=*/10);
+  IntPart.toString(Str, /*Radix=*/10);
   Str.push_back('.');
   do {
     (FractPart * RadixInt)
         .lshr(Scale)
-        .toString(Str, /*radix=*/10, Val.isSigned());
+        .toString(Str, /*Radix=*/10, Val.isSigned());
     FractPart = (FractPart * RadixInt) & FractPartMask;
   } while (FractPart != 0);
 }
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 7a26ae2fdfab8..68c8c641139f2 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -263,7 +263,7 @@ class CGBuilderTy : public CGBuilderBaseTy {
         Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name));
     llvm::APInt Offset(
         DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0,
-        /*IsSigned=*/true);
+        /*isSigned=*/true);
     if (!GEP->accumulateConstantOffset(DL, Offset))
       llvm_unreachable("offset of GEP with constants is always computable");
     return Address(GEP, Addr.getAlignment().alignmentAtOffset(
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0a75c96a74b9c..c58d1018fa0ec 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -589,7 +589,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
       auto DIter = LocalDeclMap.find(D);
       assert(DIter != LocalDeclMap.end());
 
-      return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
+      return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
                               getContext().getSizeType(), E->getBeginLoc());
     }
   }
@@ -719,7 +719,7 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,
       llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
 
   llvm::InlineAsm *IA =
-      llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+      llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
   return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
 }
 
@@ -1063,7 +1063,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
     }
     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
     llvm::InlineAsm *IA =
-        llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+        llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
         getLLVMContext(), llvm::AttributeList::FunctionIndex,
         llvm::Attribute::NoReturn);
@@ -5999,9 +5999,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
     llvm::InlineAsm *Emit =
         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
-                                 /*SideEffects=*/true)
+                                 /*hasSideEffects=*/true)
                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
-                                 /*SideEffects=*/true);
+                                 /*hasSideEffects=*/true);
 
     return Builder.CreateCall(Emit);
   }
@@ -12120,7 +12120,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // This syscall signals a driver assertion failure in x86 NT kernels.
     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
     llvm::InlineAsm *IA =
-        llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
+        llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
     llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
         getLLVMContext(), llvm::AttributeList::FunctionIndex,
         llvm::Attribute::NoReturn);
diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp
index adaeacfe868e0..6d903a0d09e23 100644
--- a/clang/lib/CodeGen/CGCXX.cpp
+++ b/clang/lib/CodeGen/CGCXX.cpp
@@ -239,7 +239,7 @@ llvm::FunctionCallee CodeGenModule::getAddrAndTypeOfCXXStructor(
 
   llvm::Constant *Ptr = GetOrCreateLLVMFunction(
       getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer,
-      /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition);
+      /*IsThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition);
   return {FnType, Ptr};
 }
 
diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp
index 9c3973fb9f1c4..041c0f8959fd7 100644
--- a/clang/lib/CodeGen/CGCXXABI.cpp
+++ b/clang/lib/CodeGen/CGCXXABI.cpp
@@ -291,7 +291,7 @@ llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage(
     GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const {
   // Delegate back to CGM by default.
   return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
-                                         /*isConstantVariable=*/false);
+                                         /*IsConstantVariable=*/false);
 }
 
 bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 705998d3b2161..5f1fb10074829 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1810,7 +1810,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
 void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
   llvm::AttrBuilder FuncAttrs;
   ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(),
-                             /* AttrOnCallsite = */ false, FuncAttrs);
+                             /* AttrOnCallSite = */ false, FuncAttrs);
   F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs);
 }
 
@@ -2490,7 +2490,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         assert(NumIRArgs == 1);
         auto AI = FnArgs[FirstIRArg];
         AI->setName(Arg->getName() + ".coerce");
-        CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this);
+        CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
       }
 
       // Match to what EmitParmDecl is expecting for this type.
@@ -3537,7 +3537,7 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const {
 void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
   LValue Dst = CGF.MakeAddrLValue(Addr, Ty);
   if (!HasLV && RV.isScalar())
-    CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true);
+    CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*isInit=*/true);
   else if (!HasLV && RV.isComplex())
     CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true);
   else {
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index c529c4d900c97..aee5a927a055b 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -405,7 +405,7 @@ struct CallCoroEnd final : public EHScopeStack::Cleanup {
     if (Bundles.empty()) {
       // Otherwise, (landingpad model), create a conditional branch that leads
       // either to a cleanup block or a block with EH resume instruction.
-      auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true);
+      auto *ResumeBB = CGF.getEHResumeBlock(/*isCleanup=*/true);
       auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont");
       CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB);
       CGF.EmitBlock(CleanupContBB);
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 739b2d858cb2e..19a9e75cc5ac9 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -176,7 +176,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
       return;
 
     llvm::GlobalValue::LinkageTypes Linkage =
-        CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
+        CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false);
 
     // FIXME: We need to force the emission/use of a guard variable for
     // some variables even if we can constant-evaluate them because
diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 748029b860960..3b7a88a0b7693 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -32,7 +32,7 @@ static llvm::FunctionCallee getFreeExceptionFn(CodeGenModule &CGM) {
   // void __cxa_free_exception(void *thrown_exception);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception");
 }
@@ -41,7 +41,7 @@ static llvm::FunctionCallee getUnexpectedFn(CodeGenModule &CGM) {
   // void __cxa_call_unexpected(void *thrown_exception);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected");
 }
@@ -50,7 +50,7 @@ llvm::FunctionCallee CodeGenModule::getTerminateFn() {
   // void __terminate();
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(VoidTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(VoidTy, /*isVarArg=*/false);
 
   StringRef name;
 
@@ -75,7 +75,7 @@ llvm::FunctionCallee CodeGenModule::getTerminateFn() {
 static llvm::FunctionCallee getCatchallRethrowFn(CodeGenModule &CGM,
                                                  StringRef Name) {
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, Name);
 }
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 62d930ca8c455..8340f48abcb64 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2031,7 +2031,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
 
   // Cast the source to the storage type and shift it into place.
   SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(),
-                                 /*IsSigned=*/false);
+                                 /*isSigned=*/false);
   llvm::Value *MaskedVal = SrcVal;
 
   // See if there are other bits in the bitfield's storage we'll need to load
@@ -2611,7 +2611,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     // some reason; most likely, because it's in an outer function.
     } else if (VD->isStaticLocal()) {
       addr = Address(CGM.getOrCreateStaticVarDecl(
-          *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)),
+          *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)),
                      getContext().getDeclAlign(VD));
 
     // No other cases for now.
@@ -3749,7 +3749,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
       Idx = Builder.CreateNSWMul(Idx, NumElements);
     EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(),
                                    !getLangOpts().isSignedOverflowDefined(),
-                                   /*SignedIndices=*/false, E->getExprLoc());
+                                   /*signedIndices=*/false, E->getExprLoc());
   } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
     // If this is A[i] where A is an array, the frontend will have decayed the
     // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
@@ -3769,7 +3769,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     EltPtr = emitArraySubscriptGEP(
         *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
         ResultExprTy, !getLangOpts().isSignedOverflowDefined(),
-        /*SignedIndices=*/false, E->getExprLoc());
+        /*signedIndices=*/false, E->getExprLoc());
     BaseInfo = ArrayLV.getBaseInfo();
     TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy);
   } else {
@@ -3778,7 +3778,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
                                            IsLowerBound);
     EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy,
                                    !getLangOpts().isSignedOverflowDefined(),
-                                   /*SignedIndices=*/false, E->getExprLoc());
+                                   /*signedIndices=*/false, E->getExprLoc());
   }
 
   return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo, TBAAInfo);
@@ -4867,7 +4867,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
                E->getDirectCallee(), /*ParamsToSkip*/ 0, Order);
 
   const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
-      Args, FnType, /*isChainCall=*/Chain);
+      Args, FnType, /*ChainCall=*/Chain);
 
   // C99 6.5.2.2p6:
   //   If the expression that denotes the called function has a type
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 25b0abbc03045..8ad229fc0c362 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -1277,7 +1277,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
   CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl));
   RValue RV =
       CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(
-                       Args, CalleeType, /*chainCall=*/false),
+                       Args, CalleeType, /*ChainCall=*/false),
                    Callee, ReturnValueSlot(), Args, &CallOrInvoke);
 
   /// C++1y [expr.new]p10:
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index cc5c463224a14..31cf2aef1ba0b 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1878,7 +1878,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
 
         if (VD->isLocalVarDecl()) {
           return CGM.getOrCreateStaticVarDecl(
-              *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false));
+              *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false));
         }
       }
     }
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 37d8bd08db991..1dd7ec52230ee 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -3735,7 +3735,7 @@ void CodeGenModule::emitAtAvailableLinkGuard() {
   llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false);
   llvm::FunctionCallee CFLinkCheckFuncRef = CreateRuntimeFunction(
       CheckFTy, "__clang_at_available_requires_core_foundation_framework",
-      llvm::AttributeList(), /*IsLocal=*/true);
+      llvm::AttributeList(), /*Local=*/true);
   llvm::Function *CFLinkCheckFunc =
       cast<llvm::Function>(CFLinkCheckFuncRef.getCallee()->stripPointerCasts());
   if (CFLinkCheckFunc->empty()) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e3a9c0b0d63b4..27e7175da841f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2898,7 +2898,7 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
       getThreadID(CGF, SourceLocation()),
       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
-                                /*IsSigned=*/false),
+                                /*isSigned=*/false),
       getOrCreateInternalVariable(
           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
   return Address(
@@ -5254,7 +5254,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
       if (const auto *ASE =
               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
         LValue UpAddrLVal =
-            CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
+            CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
         llvm::Value *UpAddr =
             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
         llvm::Value *LowIntPtr =
@@ -6293,7 +6293,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
     if (DelayedCreation) {
       CGF.EmitStoreOfScalar(
-          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
+          llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
           FlagsLVal);
     } else
       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
@@ -6649,7 +6649,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
               CGF.EmitScalarExpr(NumTeams,
                                  /*IgnoreResultAssign*/ true);
           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
-                                   /*IsSigned=*/true);
+                                   /*isSigned=*/true);
         }
         return Bld.getInt32(0);
       }
@@ -6673,7 +6673,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
           CGF.EmitScalarExpr(NumTeams,
                              /*IgnoreResultAssign*/ true);
       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
-                               /*IsSigned=*/true);
+                               /*isSigned=*/true);
     }
     return Bld.getInt32(0);
   }
@@ -6801,7 +6801,7 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
         }
         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
-                                               /*IsSigned=*/false);
+                                               /*isSigned=*/false);
         if (DefaultThreadLimitVal)
           NumThreads = CGF.Builder.CreateSelect(
               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
@@ -6875,7 +6875,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
         ThreadLimitVal =
-            Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+            Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
       }
       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
@@ -6902,7 +6902,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
       ThreadLimitVal =
-          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
     }
     const CapturedStmt *CS = D.getInnermostCapturedStmt();
     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
@@ -6925,7 +6925,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
       ThreadLimitVal =
-          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
     }
     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
   case OMPD_target_parallel:
@@ -6963,7 +6963,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
       ThreadLimitVal =
-          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false);
+          Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
     }
     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
@@ -6971,7 +6971,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
       llvm::Value *NumThreads = CGF.EmitScalarExpr(
           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
       NumThreadsVal =
-          Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false);
+          Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
       ThreadLimitVal = ThreadLimitVal
                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
                                                                 ThreadLimitVal),
@@ -7865,7 +7865,7 @@ class MappableExprsHandler {
     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
-                                                  /*isSinged=*/false);
+                                                  /*isSigned=*/false);
     Sizes.push_back(Size);
     // Map type is always TARGET_PARAM
     Types.push_back(OMP_MAP_TARGET_PARAM);
@@ -8416,7 +8416,7 @@ class MappableExprsHandler {
         CGF.Builder.CreateMemCpy(
             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
-            CurSizes.back(), /*isVolatile=*/false);
+            CurSizes.back(), /*IsVolatile=*/false);
         // Use new global variable as the base pointers.
         CurBasePointers.push_back(Addr);
         CurPointers.push_back(Addr);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index a5396a362f222..e8fbca5108ade 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3603,7 +3603,7 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
     CGF.EmitAtomicStore(RVal, LVal,
                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
                                  : llvm::AtomicOrdering::Monotonic,
-                        LVal.isVolatile(), /*IsInit=*/false);
+                        LVal.isVolatile(), /*isInit=*/false);
   }
 }
 
@@ -4095,7 +4095,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
     // Emit calculation of the iterations count.
     llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
     NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
-                                              /*IsSigned=*/false);
+                                              /*isSigned=*/false);
     return NumIterations;
   };
   if (IsOffloadEntry)
diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp
index c047587dc00c1..6b6a116cf259b 100644
--- a/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -59,7 +59,7 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
                                  FunctionType::ExtInfo info,
                                  RequiredArgs args) {
   return CGM.getTypes().arrangeLLVMFunctionInfo(
-      returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
+      returnType, /*instanceMethod=*/false, /*chainCall=*/false, argTypes,
       info, {}, args);
 }
 
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 7ab960e8bcee5..6ff72ec045e62 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1199,7 +1199,7 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
     return llvm::GlobalValue::InternalLinkage;
   }
 
-  return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
+  return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false);
 }
 
 llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index d900c7b238378..6d18027f16a80 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -1281,7 +1281,7 @@ std::string getCoverageSection(const CodeGenModule &CGM) {
 std::string normalizeFilename(StringRef Filename) {
   llvm::SmallString<256> Path(Filename);
   llvm::sys::fs::make_absolute(Path);
-  llvm::sys::path::remove_dots(Path, /*remove_dot_dots=*/true);
+  llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
   return Path.str().str();
 }
 
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index cb22239559ad1..7367ff37cf45c 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -1138,7 +1138,7 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
   // void __cxa_rethrow();
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
 
   llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
 
@@ -1152,7 +1152,7 @@ static llvm::FunctionCallee getAllocateExceptionFn(CodeGenModule &CGM) {
   // void *__cxa_allocate_exception(size_t thrown_size);
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
 }
@@ -1163,7 +1163,7 @@ static llvm::FunctionCallee getThrowFn(CodeGenModule &CGM) {
 
   llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy };
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
 }
@@ -2402,7 +2402,7 @@ static bool isThreadWrapperReplaceable(const VarDecl *VD,
 static llvm::GlobalValue::LinkageTypes
 getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) {
   llvm::GlobalValue::LinkageTypes VarLinkage =
-      CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false);
+      CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
 
   // For internal linkage variables, we don't need an external or weak wrapper.
   if (llvm::GlobalValue::isLocalLinkage(VarLinkage))
@@ -2780,7 +2780,7 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
     // RTTI, check if emitting vtables opportunistically need any adjustment.
 
     GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
-                                  /*Constant=*/true,
+                                  /*isConstant=*/true,
                                   llvm::GlobalValue::ExternalLinkage, nullptr,
                                   Name);
     const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl();
@@ -3385,7 +3385,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
   llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name);
   llvm::GlobalVariable *GV =
       new llvm::GlobalVariable(M, Init->getType(),
-                               /*Constant=*/true, Linkage, Init, Name);
+                               /*isConstant=*/true, Linkage, Init, Name);
 
   // If there's already an old global variable, replace it with the new one.
   if (OldGV) {
@@ -3906,7 +3906,7 @@ void ItaniumCXXABI::emitCXXStructor(GlobalDecl GD) {
 static llvm::FunctionCallee getBeginCatchFn(CodeGenModule &CGM) {
   // void *__cxa_begin_catch(void*);
   llvm::FunctionType *FTy = llvm::FunctionType::get(
-      CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+      CGM.Int8PtrTy, CGM.Int8PtrTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
 }
@@ -3914,7 +3914,7 @@ static llvm::FunctionCallee getBeginCatchFn(CodeGenModule &CGM) {
 static llvm::FunctionCallee getEndCatchFn(CodeGenModule &CGM) {
   // void __cxa_end_catch();
   llvm::FunctionType *FTy =
-      llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
+      llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
 }
@@ -3922,7 +3922,7 @@ static llvm::FunctionCallee getEndCatchFn(CodeGenModule &CGM) {
 static llvm::FunctionCallee getGetExceptionPtrFn(CodeGenModule &CGM) {
   // void *__cxa_get_exception_ptr(void*);
   llvm::FunctionType *FTy = llvm::FunctionType::get(
-      CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+      CGM.Int8PtrTy, CGM.Int8PtrTy, /*isVarArg=*/false);
 
   return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
 }
@@ -4196,9 +4196,9 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
 /// This code is used only in C++.
 static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) {
   llvm::FunctionType *fnTy =
-    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false);
   llvm::FunctionCallee fnRef = CGM.CreateRuntimeFunction(
-      fnTy, "__clang_call_terminate", llvm::AttributeList(), /*IsLocal=*/true);
+      fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true);
   llvm::Function *fn =
       cast<llvm::Function>(fnRef.getCallee()->stripPointerCasts());
   if (fn->empty()) {
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index c37bfe3a59440..a91a949d024f8 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -352,7 +352,7 @@ class MicrosoftCXXABI : public CGCXXABI {
             ? llvm::GlobalValue::LinkOnceODRLinkage
             : llvm::GlobalValue::InternalLinkage;
     auto *VDispMap = new llvm::GlobalVariable(
-        CGM.getModule(), VDispMapTy, /*Constant=*/true, Linkage,
+        CGM.getModule(), VDispMapTy, /*isConstant=*/true, Linkage,
         /*Initializer=*/Init, MangledName);
     return VDispMap;
   }
@@ -436,7 +436,7 @@ class MicrosoftCXXABI : public CGCXXABI {
   friend struct MSRTTIBuilder;
 
   bool isImageRelative() const {
-    return CGM.getTarget().getPointerWidth(/*AddressSpace=*/0) == 64;
+    return CGM.getTarget().getPointerWidth(/*AddrSpace=*/0) == 64;
   }
 
   // 5 routines for constructing the llvm types for MS RTTI structs.
@@ -730,7 +730,7 @@ class MicrosoftCXXABI : public CGCXXABI {
     // which describes the exception.
     llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()};
     llvm::FunctionType *FTy =
-        llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
+        llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false);
     llvm::FunctionCallee Throw =
         CGM.CreateRuntimeFunction(FTy, "_CxxThrowException");
     // _CxxThrowException is stdcall on 32-bit x86 platforms.
@@ -1313,7 +1313,7 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage(
     // The base destructor most closely tracks the user-declared constructor, so
     // we delegate back to the normal declarator case.
     return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage,
-                                           /*isConstantVariable=*/false);
+                                           /*IsConstantVariable=*/false);
   case Dtor_Complete:
     // The complete destructor is like an inline function, but it may be
     // imported and therefore must be exported as well. This requires changing
@@ -2250,7 +2250,7 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
 
   // extern "C" int __tlregdtor(void (*f)(void));
   llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get(
-      CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false);
+      CGF.IntTy, DtorStub->getType(), /*isVarArg=*/false);
 
   llvm::FunctionCallee TLRegDtor = CGF.CGM.CreateRuntimeFunction(
       TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true);
@@ -2291,7 +2291,7 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
   // pointers at start-up time and, eventually, at thread-creation time.
   auto AddToXDU = [&CGM](llvm::Function *InitFunc) {
     llvm::GlobalVariable *InitFuncPtr = new llvm::GlobalVariable(
-        CGM.getModule(), InitFunc->getType(), /*IsConstant=*/true,
+        CGM.getModule(), InitFunc->getType(), /*isConstant=*/true,
         llvm::GlobalVariable::InternalLinkage, InitFunc,
         Twine(InitFunc->getName(), "$initializer$"));
     InitFuncPtr->setSection(".CRT$XDU");
@@ -2340,7 +2340,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) {
     return ConstantAddress(GV, Align);
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), CGM.IntTy,
-      /*Constant=*/false, llvm::GlobalVariable::ExternalLinkage,
+      /*isConstant=*/false, llvm::GlobalVariable::ExternalLinkage,
       /*Initializer=*/nullptr, VarName,
       /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel);
   GV->setAlignment(Align.getQuantity());
@@ -3400,7 +3400,7 @@ static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
   if (auto VTable = CGM.getModule().getNamedGlobal(MangledName))
     return VTable;
   return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
-                                  /*Constant=*/true,
+                                  /*isConstant=*/true,
                                   llvm::GlobalVariable::ExternalLinkage,
                                   /*Initializer=*/nullptr, MangledName);
 }
@@ -3580,7 +3580,7 @@ llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() {
 
   // Forward-declare the class hierarchy descriptor
   auto Type = ABI.getClassHierarchyDescriptorType();
-  auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+  auto CHD = new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage,
                                       /*Initializer=*/nullptr,
                                       MangledName);
   if (CHD->isWeakForLinker())
@@ -3619,7 +3619,7 @@ MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) {
   auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1);
   auto *BCA =
       new llvm::GlobalVariable(Module, ArrType,
-                               /*Constant=*/true, Linkage,
+                               /*isConstant=*/true, Linkage,
                                /*Initializer=*/nullptr, MangledName);
   if (BCA->isWeakForLinker())
     BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName()));
@@ -3661,7 +3661,7 @@ MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) {
   // Forward-declare the base class descriptor.
   auto Type = ABI.getBaseClassDescriptorType();
   auto BCD =
-      new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+      new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage,
                                /*Initializer=*/nullptr, MangledName);
   if (BCD->isWeakForLinker())
     BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName()));
@@ -3707,7 +3707,7 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo &Info) {
 
   // Forward-declare the complete object locator.
   llvm::StructType *Type = ABI.getCompleteObjectLocatorType();
-  auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+  auto COL = new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage,
     /*Initializer=*/nullptr, MangledName);
 
   // Initialize the CompleteObjectLocator.
@@ -3822,7 +3822,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
   llvm::StructType *TypeDescriptorType =
       getTypeDescriptorType(TypeInfoString);
   auto *Var = new llvm::GlobalVariable(
-      CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
+      CGM.getModule(), TypeDescriptorType, /*isConstant=*/false,
       getLinkageForRTTI(Type),
       llvm::ConstantStruct::get(TypeDescriptorType, Fields),
       MangledName);
@@ -4061,7 +4061,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
   };
   llvm::StructType *CTType = getCatchableTypeType();
   auto *GV = new llvm::GlobalVariable(
-      CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T),
+      CGM.getModule(), CTType, /*isConstant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(CTType, Fields), MangledName);
   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   GV->setSection(".xdata");
@@ -4179,7 +4179,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
     getMangleContext().mangleCXXCatchableTypeArray(T, NumEntries, Out);
   }
   CTA = new llvm::GlobalVariable(
-      CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T),
+      CGM.getModule(), CTAType, /*isConstant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(CTAType, Fields), MangledName);
   CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   CTA->setSection(".xdata");
@@ -4248,7 +4248,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
       PointerToCatchableTypes                   // CatchableTypeArray
   };
   auto *GV = new llvm::GlobalVariable(
-      CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T),
+      CGM.getModule(), TIType, /*isConstant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName));
   GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   GV->setSection(".xdata");
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 458b9a6025bfe..5da988fb8a3c5 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -833,7 +833,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
 
 Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                       QualType Ty) const {
-  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false,
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false,
                           getContext().getTypeInfoInChars(Ty),
                           CharUnits::fromQuantity(4),
                           /*AllowHigherAlign=*/ true);
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 087335562d0ae..396ddf4dd8162 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4023,9 +4023,9 @@ InputInfo Driver::BuildJobsForActionNoCache(
     Input.claim();
     if (Input.getOption().matches(options::OPT_INPUT)) {
       const char *Name = Input.getValue();
-      return InputInfo(A, Name, /* BaseInput = */ Name);
+      return InputInfo(A, Name, /* _BaseInput = */ Name);
     }
-    return InputInfo(A, &Input, /* BaseInput = */ "");
+    return InputInfo(A, &Input, /* _BaseInput = */ "");
   }
 
   if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6c3074b69e9f9..edc64581172ff 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2971,7 +2971,7 @@ static void RenderObjCOptions(const ToolChain &TC, const Driver &D,
     // We default off for Objective-C, on for Objective-C++.
     if (Args.hasFlag(options::OPT_fobjc_arc_exceptions,
                      options::OPT_fno_objc_arc_exceptions,
-                     /*default=*/types::isCXX(Input.getType())))
+                     /*Default=*/types::isCXX(Input.getType())))
       CmdArgs.push_back("-fobjc-arc-exceptions");
   }
 
@@ -5702,7 +5702,7 @@ static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
   // The default is that /GX is not specified.
   if (EHArgs.empty() &&
       Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_,
-                   /*default=*/false)) {
+                   /*Default=*/false)) {
     EH.Synch = true;
     EH.NoUnwindC = true;
   }
@@ -5771,13 +5771,13 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
 
   // This controls whether or not we emit RTTI data for polymorphic types.
   if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
-                   /*default=*/false))
+                   /*Default=*/false))
     CmdArgs.push_back("-fno-rtti-data");
 
   // This controls whether or not we emit stack-protector instrumentation.
   // In MSVC, Buffer Security Check (/GS) is on by default.
   if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_,
-                   /*default=*/true)) {
+                   /*Default=*/true)) {
     CmdArgs.push_back("-stack-protector");
     CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong)));
   }
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 3a789627c5519..6ed80a8f47523 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -626,11 +626,11 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
   // FIXME: How can we ensure this stays in sync with relevant clang-cl options?
 
   if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR,
-                   /*default=*/false))
+                   /*Default=*/false))
     CmdArgs.push_back("/GR-");
 
   if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS,
-                   /*default=*/false))
+                   /*Default=*/false))
     CmdArgs.push_back("/GS-");
 
   if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections,
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index 36a18de017868..3f3c80bc1ccf1 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -833,7 +833,7 @@ class NoLineBreakFormatter : public LineFormatter {
     LineState State =
         Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun);
     while (State.NextToken) {
-      formatChildren(State, /*Newline=*/false, DryRun, Penalty);
+      formatChildren(State, /*NewLine=*/false, DryRun, Penalty);
       Indenter->addTokenToState(
           State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun);
     }
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 7d54d665146b6..e37afae5332a0 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -139,7 +139,7 @@ GeneratePCHAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile,
   std::unique_ptr<raw_pwrite_stream> OS =
       CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true,
                           /*RemoveFileOnSignal=*/false, InFile,
-                          /*Extension=*/"", /*useTemporary=*/true);
+                          /*Extension=*/"", /*UseTemporary=*/true);
   if (!OS)
     return nullptr;
 
@@ -215,7 +215,7 @@ GenerateModuleFromModuleMapAction::CreateOutputFile(CompilerInstance &CI,
   // We use a temporary to avoid race conditions.
   return CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true,
                              /*RemoveFileOnSignal=*/false, InFile,
-                             /*Extension=*/"", /*useTemporary=*/true,
+                             /*Extension=*/"", /*UseTemporary=*/true,
                              /*CreateMissingDirectories=*/true);
 }
 
diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 170149d5053ff..bd091ee033512 100644
--- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -2428,7 +2428,7 @@ void RewriteModernObjC::SynthMsgSendFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                              SourceLocation(),
                                              SourceLocation(),
@@ -2442,7 +2442,7 @@ void RewriteModernObjC::SynthMsgSendSuperFunctionDecl() {
   SmallVector<QualType, 2> ArgTys;
   ArgTys.push_back(Context->VoidTy);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendSuperFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                   SourceLocation(),
                                                   SourceLocation(),
@@ -2461,7 +2461,7 @@ void RewriteModernObjC::SynthMsgSendStretFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                   SourceLocation(),
                                                   SourceLocation(),
@@ -2477,7 +2477,7 @@ void RewriteModernObjC::SynthMsgSendSuperStretFunctionDecl() {
   SmallVector<QualType, 2> ArgTys;
   ArgTys.push_back(Context->VoidTy);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendSuperStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                        SourceLocation(),
                                                        SourceLocation(),
@@ -2497,7 +2497,7 @@ void RewriteModernObjC::SynthMsgSendFpretFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->DoubleTy,
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendFpretFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                   SourceLocation(),
                                                   SourceLocation(),
diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
index 2ff230dfff1b5..05078baee790c 100644
--- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -2335,7 +2335,7 @@ void RewriteObjC::SynthMsgSendFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                              SourceLocation(),
                                              SourceLocation(),
@@ -2357,7 +2357,7 @@ void RewriteObjC::SynthMsgSendSuperFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendSuperFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                   SourceLocation(),
                                                   SourceLocation(),
@@ -2376,7 +2376,7 @@ void RewriteObjC::SynthMsgSendStretFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                   SourceLocation(),
                                                   SourceLocation(),
@@ -2400,7 +2400,7 @@ void RewriteObjC::SynthMsgSendSuperStretFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(),
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendSuperStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                        SourceLocation(),
                                                        SourceLocation(),
@@ -2420,7 +2420,7 @@ void RewriteObjC::SynthMsgSendFpretFunctionDecl() {
   assert(!argT.isNull() && "Can't find 'SEL' type");
   ArgTys.push_back(argT);
   QualType msgSendType = getSimpleFunctionType(Context->DoubleTy,
-                                               ArgTys, /*isVariadic=*/true);
+                                               ArgTys, /*variadic=*/true);
   MsgSendFpretFunctionDecl = FunctionDecl::Create(*Context, TUDecl,
                                                   SourceLocation(),
                                                   SourceLocation(),
diff --git a/clang/lib/Index/IndexDecl.cpp b/clang/lib/Index/IndexDecl.cpp
index e41b5fecdd24b..5bbbb0d32bf45 100644
--- a/clang/lib/Index/IndexDecl.cpp
+++ b/clang/lib/Index/IndexDecl.cpp
@@ -416,7 +416,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
     if (D->isThisDeclarationADefinition()) {
       TRY_DECL(D, IndexCtx.handleDecl(D));
       TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D,
-                                       /*superLoc=*/SourceLocation()));
+                                       /*SuperLoc=*/SourceLocation()));
       TRY_TO(IndexCtx.indexDeclContext(D));
     } else {
       return IndexCtx.handleReference(D, D->getLocation(), nullptr,
@@ -466,7 +466,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor<IndexingDeclVisitor, bool> {
       CategoryLoc = D->getLocation();
     TRY_TO(IndexCtx.handleDecl(D, CategoryLoc));
     TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D,
-                                     /*superLoc=*/SourceLocation()));
+                                     /*SuperLoc=*/SourceLocation()));
     TRY_TO(IndexCtx.indexDeclContext(D));
     return true;
   }
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index ca94883ebecbb..108630cc26f69 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -539,7 +539,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
 
   FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end());
   const FileEntry *FE = FileMgr.getFile(FrameworkName,
-                                        /*openFile=*/!SuggestedModule);
+                                        /*OpenFile=*/!SuggestedModule);
   if (!FE) {
     // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h"
     const char *Private = "Private";
@@ -549,7 +549,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
       SearchPath->insert(SearchPath->begin()+OrigSize, Private,
                          Private+strlen(Private));
 
-    FE = FileMgr.getFile(FrameworkName, /*openFile=*/!SuggestedModule);
+    FE = FileMgr.getFile(FrameworkName, /*OpenFile=*/!SuggestedModule);
   }
 
   // If we found the header and are allowed to suggest a module, do so now.
@@ -1047,7 +1047,7 @@ LookupSubframeworkHeader(StringRef Filename,
   }
 
   HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
-  if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) {
+  if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true))) {
     // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h"
     HeadersFilename = FrameworkName;
     HeadersFilename += "PrivateHeaders/";
@@ -1058,7 +1058,7 @@ LookupSubframeworkHeader(StringRef Filename,
     }
 
     HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end());
-    if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true)))
+    if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true)))
       return nullptr;
   }
 
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 311c0e02fc696..2756042f23eb2 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -1419,7 +1419,7 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
 
   // Note that this macro has now been exported.
   appendMacroDirective(II, AllocateVisibilityMacroDirective(
-                                MacroNameTok.getLocation(), /*IsPublic=*/true));
+                                MacroNameTok.getLocation(), /*isPublic=*/true));
 }
 
 /// Handle a #private directive.
@@ -1446,7 +1446,7 @@ void Preprocessor::HandleMacroPrivateDirective() {
 
   // Note that this macro has now been marked private.
   appendMacroDirective(II, AllocateVisibilityMacroDirective(
-                               MacroNameTok.getLocation(), /*IsPublic=*/false));
+                               MacroNameTok.getLocation(), /*isPublic=*/false));
 }
 
 //===----------------------------------------------------------------------===//
@@ -1937,7 +1937,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
     // and making the module loader convert it back again.
     ModuleLoadResult Imported = TheModuleLoader.loadModule(
         IncludeTok.getLocation(), Path, Module::Hidden,
-        /*IsIncludeDirective=*/true);
+        /*IsInclusionDirective=*/true);
     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
            "the imported module is different than the suggested one");
 
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 5021ff8aac52d..687b9a9d3b7bd 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1707,7 +1707,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
 
         HasLexedNextToken = Tok.is(tok::string_literal);
         if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'",
-                                    /*MacroExpansion=*/false))
+                                    /*AllowMacroExpansion=*/false))
           return false;
 
         // FIXME: Should we accept "-R..." flags here, or should that be
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index e680e8d4bbd71..4e4db668551f8 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -1191,7 +1191,7 @@ struct PragmaDiagnosticHandler : public PragmaHandler {
 
     std::string WarningName;
     if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic",
-                                   /*MacroExpansion=*/false))
+                                   /*AllowMacroExpansion=*/false))
       return;
 
     if (Tok.isNot(tok::eod)) {
@@ -1389,7 +1389,7 @@ struct PragmaExecCharsetHandler : public PragmaHandler {
         std::string ExecCharset;
         if (!PP.FinishLexStringLiteral(Tok, ExecCharset,
                                        "pragma execution_character_set",
-                                       /*MacroExpansion=*/false))
+                                       /*AllowMacroExpansion=*/false))
           return;
 
         // MSVC supports either of these, but nothing else.
@@ -1490,7 +1490,7 @@ struct PragmaMessageHandler : public PragmaHandler {
 
     std::string MessageString;
     if (!PP.FinishLexStringLiteral(Tok, MessageString, PragmaKind(Kind),
-                                   /*MacroExpansion=*/true))
+                                   /*AllowMacroExpansion=*/true))
       return;
 
     if (ExpectClosingParen) {
@@ -1540,7 +1540,7 @@ struct PragmaModuleImportHandler : public PragmaHandler {
     // If we have a non-empty module path, load the named module.
     Module *Imported =
         PP.getModuleLoader().loadModule(ImportLoc, ModuleName, Module::Hidden,
-                                      /*IsIncludeDirective=*/false);
+                                      /*IsInclusionDirective=*/false);
     if (!Imported)
       return;
 
@@ -1666,7 +1666,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler {
 
     // Load the module, don't make it visible.
     PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden,
-                                    /*IsIncludeDirective=*/false);
+                                    /*IsInclusionDirective=*/false);
   }
 };
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index dd61938955699..bdc5fbcd2beab 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1263,7 +1263,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
     Imported = TheModuleLoader.loadModule(ModuleImportLoc,
                                           ModuleImportPath,
                                           Module::Hidden,
-                                          /*IsIncludeDirective=*/false);
+                                          /*IsInclusionDirective=*/false);
     if (Imported)
       makeModuleVisible(Imported, SemiLoc);
   }
diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp
index 14151efd3d2a8..a1abf8269c451 100644
--- a/clang/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp
@@ -1058,7 +1058,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks,
         case CIK_DefaultArgument:
           bool InvalidAsDeclaration = false;
           Result = TryParseParameterDeclarationClause(
-              &InvalidAsDeclaration, /*VersusTemplateArgument=*/true);
+              &InvalidAsDeclaration, /*VersusTemplateArg=*/true);
           // If this is an expression or a declaration with a missing
           // 'typename', assume it's not a declaration.
           if (Result == TPResult::Ambiguous && InvalidAsDeclaration)
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 0b57c8ab66345..97fd92bec7875 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -3189,7 +3189,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
           Actions.getTypeName(*Next.getIdentifierInfo(), Next.getLocation(),
                               getCurScope(), &SS, false, false, nullptr,
                               /*IsCtorOrDtorName=*/false,
-                              /*WantNonTrivialSourceInfo=*/true,
+                              /*WantNontrivialTypeSourceInfo=*/true,
                               isClassTemplateDeductionContext(DSContext));
 
       // If the referenced identifier is not a type, then this declspec is
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 81a56a025048c..9c61c4da447aa 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -1208,9 +1208,9 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc,
   // We have an identifier; check whether it is actually a type.
   IdentifierInfo *CorrectedII = nullptr;
   ParsedType Type = Actions.getTypeName(
-      *Id, IdLoc, getCurScope(), &SS, /*IsClassName=*/true, false, nullptr,
+      *Id, IdLoc, getCurScope(), &SS, /*isClassName=*/true, false, nullptr,
       /*IsCtorOrDtorName=*/false,
-      /*NonTrivialTypeSourceInfo=*/true,
+      /*WantNontrivialTypeSourceInfo=*/true,
       /*IsClassTemplateDeductionContext*/ false, &CorrectedII);
   if (!Type) {
     Diag(IdLoc, diag::err_expected_class_name);
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index b5c01fe60fb04..7a0c07bd3b04e 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1770,7 +1770,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
       if (Tok.is(tok::code_completion)) {
         tok::TokenKind CorrectedOpKind =
             OpKind == tok::arrow ? tok::period : tok::arrow;
-        ExprResult CorrectedLHS(/*IsInvalid=*/true);
+        ExprResult CorrectedLHS(/*Invalid=*/true);
         if (getLangOpts().CPlusPlus && OrigLHS) {
           const bool DiagsAreSuppressed = Diags.getSuppressAllDiagnostics();
           Diags.setSuppressAllDiagnostics(true);
@@ -2052,7 +2052,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
   if (isCastExpr)
     return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),
                                                  ExprKind,
-                                                 /*isType=*/true,
+                                                 /*IsType=*/true,
                                                  CastTy.getAsOpaquePtr(),
                                                  CastRange);
 
@@ -2063,7 +2063,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() {
   if (!Operand.isInvalid())
     Operand = Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(),
                                                     ExprKind,
-                                                    /*isType=*/false,
+                                                    /*IsType=*/false,
                                                     Operand.get(),
                                                     CastRange);
   return Operand;
@@ -3055,7 +3055,7 @@ ExprResult Parser::ParseBlockLiteralExpression() {
                                      /*IsAmbiguous=*/false,
                                      /*RParenLoc=*/NoLoc,
                                      /*ArgInfo=*/nullptr,
-                                     /*NumArgs=*/0,
+                                     /*NumParams=*/0,
                                      /*EllipsisLoc=*/NoLoc,
                                      /*RParenLoc=*/NoLoc,
                                      /*RefQualifierIsLvalueRef=*/true,
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 9d296f18527db..85c7e6c6bcdf9 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1334,10 +1334,10 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
 
     SourceLocation NoLoc;
     D.AddTypeInfo(DeclaratorChunk::getFunction(
-                      /*hasProto=*/true,
-                      /*isAmbiguous=*/false, LParenLoc, ParamInfo.data(),
+                      /*HasProto=*/true,
+                      /*IsAmbiguous=*/false, LParenLoc, ParamInfo.data(),
                       ParamInfo.size(), EllipsisLoc, RParenLoc,
-                      /*RefQualifierIsLValueRef=*/true,
+                      /*RefQualifierIsLvalueRef=*/true,
                       /*RefQualifierLoc=*/NoLoc, MutableLoc, ESpecType,
                       ESpecRange, DynamicExceptions.data(),
                       DynamicExceptionRanges.data(), DynamicExceptions.size(),
@@ -1394,14 +1394,14 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
 
     SourceLocation NoLoc;
     D.AddTypeInfo(DeclaratorChunk::getFunction(
-                      /*hasProto=*/true,
-                      /*isAmbiguous=*/false,
+                      /*HasProto=*/true,
+                      /*IsAmbiguous=*/false,
                       /*LParenLoc=*/NoLoc,
                       /*Params=*/nullptr,
                       /*NumParams=*/0,
                       /*EllipsisLoc=*/NoLoc,
                       /*RParenLoc=*/NoLoc,
-                      /*RefQualifierIsLValueRef=*/true,
+                      /*RefQualifierIsLvalueRef=*/true,
                       /*RefQualifierLoc=*/NoLoc, MutableLoc, EST_None,
                       /*ESpecRange=*/SourceRange(),
                       /*Exceptions=*/nullptr,
@@ -1701,7 +1701,7 @@ Parser::ParseCXXPseudoDestructor(Expr *Base, SourceLocation OpLoc,
       ParseUnqualifiedIdTemplateId(SS, SourceLocation(),
                                    Name, NameLoc,
                                    false, ObjectType, SecondTypeName,
-                                   /*AssumeTemplateName=*/true))
+                                   /*AssumeTemplateId=*/true))
     return ExprError();
 
   return Actions.ActOnPseudoDestructorExpr(getCurScope(), Base, OpLoc, OpKind,
@@ -3061,7 +3061,7 @@ void Parser::ParseDirectNewDeclarator(Declarator &D) {
     MaybeParseCXX11Attributes(Attrs);
 
     D.AddTypeInfo(DeclaratorChunk::getArray(0,
-                                            /*static=*/false, /*star=*/false,
+                                            /*isStatic=*/false, /*isStar=*/false,
                                             Size.get(), T.getOpenLocation(),
                                             T.getCloseLocation()),
                   std::move(Attrs), T.getCloseLocation());
diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp
index a8d6f7fd60270..8937a0986c956 100644
--- a/clang/lib/Parse/ParseObjc.cpp
+++ b/clang/lib/Parse/ParseObjc.cpp
@@ -3194,15 +3194,15 @@ Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc,
         if (SuperLoc.isValid())
           Actions.CodeCompleteObjCSuperMessage(getCurScope(), SuperLoc,
                                                KeyIdents,
-                                               /*AtArgumentEpression=*/true);
+                                               /*AtArgumentExpression=*/true);
         else if (ReceiverType)
           Actions.CodeCompleteObjCClassMessage(getCurScope(), ReceiverType,
                                                KeyIdents,
-                                               /*AtArgumentEpression=*/true);
+                                               /*AtArgumentExpression=*/true);
         else
           Actions.CodeCompleteObjCInstanceMessage(getCurScope(), ReceiverExpr,
                                                   KeyIdents,
-                                                  /*AtArgumentEpression=*/true);
+                                                  /*AtArgumentExpression=*/true);
 
         cutOffParsing();
         return ExprError();
@@ -3232,15 +3232,15 @@ Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc,
         if (SuperLoc.isValid())
           Actions.CodeCompleteObjCSuperMessage(getCurScope(), SuperLoc,
                                                KeyIdents,
-                                               /*AtArgumentEpression=*/false);
+                                               /*AtArgumentExpression=*/false);
         else if (ReceiverType)
           Actions.CodeCompleteObjCClassMessage(getCurScope(), ReceiverType,
                                                KeyIdents,
-                                               /*AtArgumentEpression=*/false);
+                                               /*AtArgumentExpression=*/false);
         else
           Actions.CodeCompleteObjCInstanceMessage(getCurScope(), ReceiverExpr,
                                                   KeyIdents,
-                                                /*AtArgumentEpression=*/false);
+                                                /*AtArgumentExpression=*/false);
         cutOffParsing();
         return ExprError();
       }
diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp
index 5b9749c80aa6d..f81ecc738c283 100644
--- a/clang/lib/Parse/ParsePragma.cpp
+++ b/clang/lib/Parse/ParsePragma.cpp
@@ -2491,7 +2491,7 @@ void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP,
   std::string NameString;
   if (!PP.LexStringLiteral(Tok, NameString,
                            "pragma detect_mismatch",
-                           /*MacroExpansion=*/true))
+                           /*AllowMacroExpansion=*/true))
     return;
 
   // Read the comma followed by a second string literal.
@@ -2502,7 +2502,7 @@ void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP,
   }
 
   if (!PP.LexStringLiteral(Tok, ValueString, "pragma detect_mismatch",
-                           /*MacroExpansion=*/true))
+                           /*AllowMacroExpansion=*/true))
     return;
 
   if (Tok.isNot(tok::r_paren)) {
@@ -2584,7 +2584,7 @@ void PragmaCommentHandler::HandlePragma(Preprocessor &PP,
   std::string ArgumentString;
   if (Tok.is(tok::comma) && !PP.LexStringLiteral(Tok, ArgumentString,
                                                  "pragma comment",
-                                                 /*MacroExpansion=*/true))
+                                                 /*AllowMacroExpansion=*/true))
     return;
 
   // FIXME: warn that 'exestr' is deprecated.
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index 6ae75eda24312..9bb5b6eac37e2 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -303,7 +303,7 @@ Decl *Parser::ParseSingleDeclarationAfterTemplate(
         return ParseFunctionDefinition(
             DeclaratorInfo, ParsedTemplateInfo(&FakedParamLists,
                                                /*isSpecialization=*/true,
-                                               /*LastParamListWasEmpty=*/true),
+                                               /*lastParameterListWasEmpty=*/true),
             &LateParsedAttrs);
       }
     }
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 9ca6e4ab0e21e..85d6da700eaad 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2044,7 +2044,7 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
 
     if (!CStyle) {
       Self.CheckCompatibleReinterpretCast(SrcType, DestType,
-                                          /*isDereference=*/false, OpRange);
+                                          /*IsDereference=*/false, OpRange);
     }
 
     // C++ 5.2.10p10: [...] a reference cast reinterpret_cast<T&>(x) has the
@@ -2881,7 +2881,7 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc,
   Op.OpRange = SourceRange(LPLoc, CastExpr->getEndLoc());
 
   if (getLangOpts().CPlusPlus) {
-    Op.CheckCXXCStyleCast(/*FunctionalStyle=*/ false,
+    Op.CheckCXXCStyleCast(/*FunctionalCast=*/ false,
                           isa<InitListExpr>(CastExpr));
   } else {
     Op.CheckCStyleCast();
@@ -2908,7 +2908,7 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo,
   Op.DestRange = CastTypeInfo->getTypeLoc().getSourceRange();
   Op.OpRange = SourceRange(Op.DestRange.getBegin(), CastExpr->getEndLoc());
 
-  Op.CheckCXXCStyleCast(/*FunctionalStyle=*/true, /*ListInit=*/false);
+  Op.CheckCXXCStyleCast(/*FunctionalCast=*/true, /*ListInit=*/false);
   if (Op.SrcExpr.isInvalid())
     return ExprError();
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1c3d12ea0983a..f9f82cdeef432 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -11391,7 +11391,7 @@ CheckImplicitConversion(Sema &S, Expr *E, QualType T, SourceLocation CC,
         if (Overflowed) {
           S.DiagRuntimeBehavior(E->getExprLoc(), E,
                                 S.PDiag(diag::warn_impcast_fixed_point_range)
-                                    << Value.toString(/*radix=*/10) << T
+                                    << Value.toString(/*Radix=*/10) << T
                                     << E->getSourceRange()
                                     << clang::SourceRange(CC));
           return;
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 7770f345cd051..93a104e3ade5f 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -5136,7 +5136,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
     Decls.append(UME->decls_begin(), UME->decls_end());
     const bool FirstArgumentIsBase = !UME->isImplicitAccess() && UME->getBase();
     AddFunctionCandidates(Decls, ArgExprs, CandidateSet, TemplateArgs,
-                          /*SuppressUsedConversions=*/false,
+                          /*SuppressUserConversions=*/false,
                           /*PartialOverloading=*/true, FirstArgumentIsBase);
   } else {
     FunctionDecl *FD = nullptr;
@@ -5151,7 +5151,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
       else
         AddOverloadCandidate(FD, DeclAccessPair::make(FD, FD->getAccess()),
                              Args, CandidateSet,
-                             /*SuppressUsedConversions=*/false,
+                             /*SuppressUserConversions=*/false,
                              /*PartialOverloading=*/true);
 
     } else if (auto DC = NakedFn->getType()->getAsCXXRecordDecl()) {
@@ -5168,7 +5168,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn,
         ArgExprs.append(Args.begin(), Args.end());
         AddFunctionCandidates(R.asUnresolvedSet(), ArgExprs, CandidateSet,
                               /*ExplicitArgs=*/nullptr,
-                              /*SuppressUsedConversions=*/false,
+                              /*SuppressUserConversions=*/false,
                               /*PartialOverloading=*/true);
       }
     } else {
@@ -5216,14 +5216,14 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type,
     if (auto *FD = dyn_cast<FunctionDecl>(C)) {
       AddOverloadCandidate(FD, DeclAccessPair::make(FD, C->getAccess()), Args,
                            CandidateSet,
-                           /*SuppressUsedConversions=*/false,
+                           /*SuppressUserConversions=*/false,
                            /*PartialOverloading=*/true,
                            /*AllowExplicit*/ true);
     } else if (auto *FTD = dyn_cast<FunctionTemplateDecl>(C)) {
       AddTemplateOverloadCandidate(
           FTD, DeclAccessPair::make(FTD, C->getAccess()),
           /*ExplicitTemplateArgs=*/nullptr, Args, CandidateSet,
-          /*SuppressUsedConversions=*/false,
+          /*SuppressUserConversions=*/false,
           /*PartialOverloading=*/true);
     }
   }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index ee7950de45e73..0709c926ed90e 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -716,7 +716,7 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II,
           getTypeName(*Corrected.getCorrectionAsIdentifierInfo(), IILoc, S,
                       tmpSS.isSet() ? &tmpSS : SS, false, false, nullptr,
                       /*IsCtorOrDtorName=*/false,
-                      /*NonTrivialTypeSourceInfo=*/true);
+                      /*WantNontrivialTypeSourceInfo=*/true);
     }
     return;
   }
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 802ca52371a58..725a7770d67d7 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -900,7 +900,7 @@ static void handleAllocSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   const Expr *SizeExpr = AL.getArgAsExpr(0);
   int SizeArgNoVal;
   // Parameter indices are 1-indexed, hence Index=1
-  if (!checkPositiveIntArgument(S, AL, SizeExpr, SizeArgNoVal, /*Index=*/1))
+  if (!checkPositiveIntArgument(S, AL, SizeExpr, SizeArgNoVal, /*Idx=*/1))
     return;
   if (!checkParamIsIntegerType(S, FD, AL, /*AttrArgNo=*/0))
     return;
@@ -911,7 +911,7 @@ static void handleAllocSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     const Expr *NumberExpr = AL.getArgAsExpr(1);
     int Val;
     // Parameter indices are 1-based, hence Index=2
-    if (!checkPositiveIntArgument(S, AL, NumberExpr, Val, /*Index=*/2))
+    if (!checkPositiveIntArgument(S, AL, NumberExpr, Val, /*Idx=*/2))
       return;
     if (!checkParamIsIntegerType(S, FD, AL, /*AttrArgNo=*/1))
       return;
@@ -1673,7 +1673,7 @@ void Sema::AddAllocAlignAttr(SourceRange AttrRange, Decl *D, Expr *ParamExpr,
   ParamIdx Idx;
   const auto *FuncDecl = cast<FunctionDecl>(D);
   if (!checkFunctionOrMethodParameterIndex(*this, FuncDecl, TmpAttr,
-                                           /*AttrArgNo=*/1, ParamExpr, Idx))
+                                           /*AttrArgNum=*/1, ParamExpr, Idx))
     return;
 
   QualType Ty = getFunctionOrMethodParamType(D, Idx.getASTIndex());
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index b23352bffbf17..cb6b4188039f2 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -11168,8 +11168,8 @@ Sema::findInheritingConstructor(SourceLocation Loc,
 
   CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create(
       Context, Derived, UsingLoc, NameInfo, TInfo->getType(), TInfo,
-      BaseCtor->getExplicitSpecifier(), /*Inline=*/true,
-      /*ImplicitlyDeclared=*/true,
+      BaseCtor->getExplicitSpecifier(), /*isInline=*/true,
+      /*isImplicitlyDeclared=*/true,
       Constexpr ? BaseCtor->getConstexprKind() : CSK_unspecified,
       InheritedConstructor(Shadow, BaseCtor));
   if (Shadow->isInvalidDecl())
@@ -11190,7 +11190,7 @@ Sema::findInheritingConstructor(SourceLocation Loc,
         Context.getTrivialTypeSourceInfo(FPT->getParamType(I), UsingLoc);
     ParmVarDecl *PD = ParmVarDecl::Create(
         Context, DerivedCtor, UsingLoc, UsingLoc, /*IdentifierInfo=*/nullptr,
-        FPT->getParamType(I), TInfo, SC_None, /*DefaultArg=*/nullptr);
+        FPT->getParamType(I), TInfo, SC_None, /*DefArg=*/nullptr);
     PD->setScopeInfo(0, I);
     PD->setImplicit();
     // Ensure attributes are propagated onto parameters (this matters for
@@ -11751,7 +11751,7 @@ buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T,
 
     // Create the reference to operator=.
     ExprResult OpEqualRef
-      = S.BuildMemberReferenceExpr(To.build(S, Loc), T, Loc, /*isArrow=*/false,
+      = S.BuildMemberReferenceExpr(To.build(S, Loc), T, Loc, /*IsArrow=*/false,
                                    SS, /*TemplateKWLoc=*/SourceLocation(),
                                    /*FirstQualifierInScope=*/nullptr,
                                    OpLookup,
diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp
index 21d9b8c32266d..e629837eb71d1 100644
--- a/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/clang/lib/Sema/SemaDeclObjC.cpp
@@ -1587,7 +1587,7 @@ void Sema::actOnObjCTypeArgsOrProtocolQualifiers(
     // add the '*'.
     if (type->getAs<ObjCInterfaceType>()) {
       SourceLocation starLoc = getLocForEndOfToken(loc);
-      D.AddTypeInfo(DeclaratorChunk::getPointer(/*typeQuals=*/0, starLoc,
+      D.AddTypeInfo(DeclaratorChunk::getPointer(/*TypeQuals=*/0, starLoc,
                                                 SourceLocation(),
                                                 SourceLocation(),
                                                 SourceLocation(),
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 1e49a363ab330..d8869ffe945a2 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1083,8 +1083,8 @@ static QualType handleFloatConversion(Sema &S, ExprResult &LHS,
       LHSType = S.Context.FloatTy;
 
     return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType,
-                                      /*convertFloat=*/!IsCompAssign,
-                                      /*convertInt=*/ true);
+                                      /*ConvertFloat=*/!IsCompAssign,
+                                      /*ConvertInt=*/ true);
   }
   assert(RHSFloat);
   return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType,
@@ -2151,7 +2151,7 @@ recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context,
     return CXXDependentScopeMemberExpr::Create(
         Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true,
         /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc,
-        /*FirstQualifierInScope=*/nullptr, NameInfo, TemplateArgs);
+        /*FirstQualifierFoundInScope=*/nullptr, NameInfo, TemplateArgs);
   }
 
   // Synthesize a fake NNS that points to the derived class.  This will
@@ -7308,10 +7308,10 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS,
   // GCC compatibility: soften pointer/integer mismatch.  Note that
   // null pointers have been filtered out by this point.
   if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc,
-      /*isIntFirstExpr=*/true))
+      /*IsIntFirstExpr=*/true))
     return RHSTy;
   if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc,
-      /*isIntFirstExpr=*/false))
+      /*IsIntFirstExpr=*/false))
     return LHSTy;
 
   // Emit a better diagnostic if one of the expressions is a null pointer
@@ -9105,7 +9105,7 @@ static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS,
 QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
                                            bool IsCompAssign, bool IsDiv) {
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType())
@@ -9129,7 +9129,7 @@ QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS,
 
 QualType Sema::CheckRemainderOperands(
   ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) {
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
@@ -9418,7 +9418,7 @@ static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc,
 QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS,
                                      SourceLocation Loc, BinaryOperatorKind Opc,
                                      QualType* CompLHSTy) {
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
@@ -9512,7 +9512,7 @@ QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS,
 QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS,
                                         SourceLocation Loc,
                                         QualType* CompLHSTy) {
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType()) {
@@ -9800,7 +9800,7 @@ static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS,
 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS,
                                   SourceLocation Loc, BinaryOperatorKind Opc,
                                   bool IsCompAssign) {
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false);
 
   // Vector shifts promote their scalar inputs to vector type.
   if (LHS.get()->getType()->isVectorType() ||
@@ -9969,7 +9969,7 @@ static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) {
   Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector();
   ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel,
                                                       InterfaceType,
-                                                      /*instance=*/true);
+                                                      /*IsInstance=*/true);
   if (!Method) {
     if (Type->isObjCIdType()) {
       // For 'id', just check the global pool.
@@ -9978,7 +9978,7 @@ static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) {
     } else {
       // Check protocols.
       Method = S.LookupMethodInQualifiedType(IsEqualSel, Type,
-                                             /*instance=*/true);
+                                             /*IsInstance=*/true);
     }
   }
 
@@ -10478,7 +10478,7 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
       return QualType();
   }
 
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/true);
 
   // Handle vector comparisons separately.
   if (LHS.get()->getType()->isVectorType() ||
@@ -10988,7 +10988,7 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
 inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
                                            BinaryOperatorKind Opc) {
-  checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
+  checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false);
 
   bool IsCompAssign =
       Opc == BO_AndAssign || Opc == BO_OrAssign || Opc == BO_XorAssign;
@@ -15626,7 +15626,7 @@ static bool captureInLambda(LambdaScopeInfo *LSI,
 
   // Add the capture.
   if (BuildAndDiagnose)
-    LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable,
+    LSI->addCapture(Var, /*isBlock=*/false, ByRef, RefersToCapturedVariable,
                     Loc, EllipsisLoc, CaptureType, Invalid);
 
   return !Invalid;
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 3029861f2303a..df8638a013623 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1254,7 +1254,7 @@ ExprResult Sema::ActOnCXXThis(SourceLocation Loc) {
   QualType ThisTy = getCurrentThisType();
   if (ThisTy.isNull())
     return Diag(Loc, diag::err_invalid_this_use);
-  return BuildCXXThisExpr(Loc, ThisTy, /*isImplicit=*/false);
+  return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false);
 }
 
 Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type,
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index 2431f96fb3f75..c856e37e99e7a 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -1106,7 +1106,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
     SourceLocation Loc = R.getNameLoc();
     if (SS.getRange().isValid())
       Loc = SS.getRange().getBegin();
-    BaseExpr = BuildCXXThisExpr(Loc, BaseExprType, /*isImplicit=*/true);
+    BaseExpr = BuildCXXThisExpr(Loc, BaseExprType, /*IsImplicit=*/true);
   }
 
   // Check the use of this member.
@@ -1130,7 +1130,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
 
   if (VarDecl *Var = dyn_cast<VarDecl>(MemberDecl)) {
     return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var,
-                           FoundDecl, /*MultipleCandidates=*/false,
+                           FoundDecl, /*HadMultipleCandidates=*/false,
                            MemberNameInfo, Var->getType().getNonReferenceType(),
                            VK_LValue, OK_Ordinary);
   }
@@ -1147,14 +1147,14 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
     }
 
     return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc,
-                           MemberFn, FoundDecl, /*MultipleCandidates=*/false,
+                           MemberFn, FoundDecl, /*HadMultipleCandidates=*/false,
                            MemberNameInfo, type, valueKind, OK_Ordinary);
   }
   assert(!isa<FunctionDecl>(MemberDecl) && "member function not C++ method?");
 
   if (EnumConstantDecl *Enum = dyn_cast<EnumConstantDecl>(MemberDecl)) {
     return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Enum,
-                           FoundDecl, /*MultipleCandidates=*/false,
+                           FoundDecl, /*HadMultipleCandidates=*/false,
                            MemberNameInfo, Enum->getType(), VK_RValue,
                            OK_Ordinary);
   }
@@ -1163,7 +1163,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
             *this, VarTempl, TemplateArgs, MemberNameInfo, TemplateKWLoc))
       return BuildMemberExpr(
           BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var, FoundDecl,
-          /*MultipleCandidates=*/false, MemberNameInfo,
+          /*HadMultipleCandidates=*/false, MemberNameInfo,
           Var->getType().getNonReferenceType(), VK_LValue, OK_Ordinary);
     return ExprError();
   }
@@ -1822,7 +1822,7 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow,
 
   return BuildMemberExpr(Base.get(), IsArrow, OpLoc, &SS,
                          /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl,
-                         /*MultipleCandidates=*/false, MemberNameInfo,
+                         /*HadMultipleCandidates=*/false, MemberNameInfo,
                          MemberType, VK, OK);
 }
 
@@ -1851,7 +1851,7 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS,
     SourceLocation Loc = R.getNameLoc();
     if (SS.getRange().isValid())
       Loc = SS.getRange().getBegin();
-    baseExpr = BuildCXXThisExpr(loc, ThisTy, /*isImplicit=*/true);
+    baseExpr = BuildCXXThisExpr(loc, ThisTy, /*IsImplicit=*/true);
   }
 
   return BuildMemberReferenceExpr(baseExpr, ThisTy,
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index d2a4a0979d15a..b3b34699eb64f 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -3931,7 +3931,7 @@ static void TryConstructorInitialization(Sema &S,
       Result = ResolveConstructorOverload(S, Kind.getLocation(), Args,
                                           CandidateSet, DestType, Ctors, Best,
                                           CopyInitialization, AllowExplicit,
-                                          /*OnlyListConstructor=*/true,
+                                          /*OnlyListConstructors=*/true,
                                           IsListInit);
   }
 
@@ -4118,7 +4118,7 @@ static void TryReferenceListInitialization(Sema &S,
   if (Sequence) {
     if (DestType->isRValueReferenceType() ||
         (T1Quals.hasConst() && !T1Quals.hasVolatile()))
-      Sequence.AddReferenceBindingStep(cv1T1, /*bindingTemporary=*/true);
+      Sequence.AddReferenceBindingStep(cv1T1, /*BindingTemporary=*/true);
     else
       Sequence.SetFailed(
           InitializationSequence::FK_NonConstLValueReferenceBindingToTemporary);
@@ -4833,7 +4833,7 @@ static void TryReferenceInitializationCore(Sema &S,
     return;
   }
 
-  Sequence.AddReferenceBindingStep(cv1T1IgnoreAS, /*bindingTemporary=*/true);
+  Sequence.AddReferenceBindingStep(cv1T1IgnoreAS, /*BindingTemporary=*/true);
 
   if (T1Quals.hasAddressSpace()) {
     if (!Qualifiers::isAddressSpaceSupersetOf(T1Quals.getAddressSpace(),
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index 268e15c20ef3e..986524e6d56b4 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -1328,7 +1328,7 @@ static void addFunctionPointerConversion(Sema &S,
         S.Context.getTranslationUnitDecl(), From->getBeginLoc(),
         From->getLocation(), From->getIdentifier(), From->getType(),
         From->getTypeSourceInfo(), From->getStorageClass(),
-        /*DefaultArg=*/nullptr));
+        /*DefArg=*/nullptr));
     CallOpConvTL.setParam(I, From);
     CallOpConvNameTL.setParam(I, From);
   }
@@ -1376,7 +1376,7 @@ static void addFunctionPointerConversion(Sema &S,
   CXXMethodDecl *Invoke = CXXMethodDecl::Create(
       S.Context, Class, Loc, DeclarationNameInfo(InvokerName, Loc),
       InvokerFunctionTy, CallOperator->getTypeSourceInfo(), SC_Static,
-      /*IsInline=*/true, CSK_unspecified, CallOperator->getBody()->getEndLoc());
+      /*isInline=*/true, CSK_unspecified, CallOperator->getBody()->getEndLoc());
   for (unsigned I = 0, N = CallOperator->getNumParams(); I != N; ++I)
     InvokerParams[I]->setOwningFunction(Invoke);
   Invoke->setParams(InvokerParams);
@@ -1860,7 +1860,7 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation,
         Context, Block, From->getBeginLoc(), From->getLocation(),
         From->getIdentifier(), From->getType(), From->getTypeSourceInfo(),
         From->getStorageClass(),
-        /*DefaultArg=*/nullptr));
+        /*DefArg=*/nullptr));
   }
   Block->setParams(BlockParams);
 
@@ -1875,8 +1875,8 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation,
                                     ConvLocation, nullptr,
                                     Src->getType(), CapVarTSI,
                                     SC_None);
-  BlockDecl::Capture Capture(/*Variable=*/CapVar, /*ByRef=*/false,
-                             /*Nested=*/false, /*Copy=*/Init.get());
+  BlockDecl::Capture Capture(/*variable=*/CapVar, /*byRef=*/false,
+                             /*nested=*/false, /*copy=*/Init.get());
   Block->setCaptures(Context, Capture, /*CapturesCXXThis=*/false);
 
   // Add a fake function body to the block. IR generation is responsible
diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 68c2286cf492e..10de0ca91221c 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -206,7 +206,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
         PP.getIdentifierInfo(ModuleName), Path[0].second);
     Mod = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc},
                                        Module::AllVisible,
-                                       /*IsIncludeDirective=*/false);
+                                       /*IsInclusionDirective=*/false);
     if (!Mod) {
       Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName;
       // Create an empty module interface unit for error recovery.
@@ -323,7 +323,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
 
   Module *Mod =
       getModuleLoader().loadModule(ImportLoc, Path, Module::AllVisible,
-                                   /*IsIncludeDirective=*/false);
+                                   /*IsInclusionDirective=*/false);
   if (!Mod)
     return true;
 
diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp
index 2521441f8bc71..e5c014501431b 100644
--- a/clang/lib/Sema/SemaObjCProperty.cpp
+++ b/clang/lib/Sema/SemaObjCProperty.cpp
@@ -1288,7 +1288,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
 
       Ivar = ObjCIvarDecl::Create(Context, ClassImpDecl,
                                   PropertyIvarLoc,PropertyIvarLoc, PropertyIvar,
-                                  PropertyIvarType, /*Dinfo=*/nullptr,
+                                  PropertyIvarType, /*TInfo=*/nullptr,
                                   ObjCIvarDecl::Private,
                                   (Expr *)nullptr, true);
       if (RequireNonAbstractType(PropertyIvarLoc,
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b669929e655f3..222d042b6da59 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -6000,7 +6000,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
         CollapseLoopCountExpr->EvaluateAsInt(Result, SemaRef.getASTContext())) {
       NestedLoopCount = Result.Val.getInt().getLimitedValue();
     } else {
-      Built.clear(/*size=*/1);
+      Built.clear(/*Size=*/1);
       return 1;
     }
   }
@@ -6022,7 +6022,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
       }
       OrderedLoopCount = Result.getLimitedValue();
     } else {
-      Built.clear(/*size=*/1);
+      Built.clear(/*Size=*/1);
       return 1;
     }
   }
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 11595fac2cb6b..77e6767c2b814 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1950,7 +1950,7 @@ IsTransparentUnionStandardConversion(Sema &S, Expr* From,
   // It's compatible if the expression matches any of the fields.
   for (const auto *it : UD->fields()) {
     if (IsStandardConversion(S, From, it->getType(), InOverloadResolution, SCS,
-                             CStyle, /*ObjCWritebackConversion=*/false)) {
+                             CStyle, /*AllowObjCWritebackConversion=*/false)) {
       ToType = it->getType();
       return true;
     }
@@ -5423,7 +5423,7 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From,
           : TryCopyInitialization(S, From, T,
                                   /*SuppressUserConversions=*/false,
                                   /*InOverloadResolution=*/false,
-                                  /*AllowObjcWritebackConversion=*/false,
+                                  /*AllowObjCWritebackConversion=*/false,
                                   /*AllowExplicit=*/false);
   StandardConversionSequence *SCS = nullptr;
   switch (ICS.getKind()) {
@@ -7319,7 +7319,7 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op,
          ++Oper)
       AddMethodCandidate(Oper.getPair(), Args[0]->getType(),
                          Args[0]->Classify(Context), Args.slice(1),
-                         CandidateSet, /*SuppressUserConversions=*/false);
+                         CandidateSet, /*SuppressUserConversion=*/false);
   }
 }
 
@@ -8420,7 +8420,7 @@ class BuiltinOperatorOverloadBuilder {
         isEqualOp ? *Ptr : S.Context.getPointerDiffType(),
       };
       S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                            /*IsAssigmentOperator=*/ isEqualOp);
+                            /*IsAssignmentOperator=*/ isEqualOp);
 
       bool NeedVolatile = !(*Ptr).isVolatileQualified() &&
                           VisibleTypeConversionsQuals.hasVolatile();
@@ -8429,7 +8429,7 @@ class BuiltinOperatorOverloadBuilder {
         ParamTypes[0] =
           S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr));
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                              /*IsAssigmentOperator=*/isEqualOp);
+                              /*IsAssignmentOperator=*/isEqualOp);
       }
 
       if (!(*Ptr).isRestrictQualified() &&
@@ -8438,7 +8438,7 @@ class BuiltinOperatorOverloadBuilder {
         ParamTypes[0]
           = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr));
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                              /*IsAssigmentOperator=*/isEqualOp);
+                              /*IsAssignmentOperator=*/isEqualOp);
 
         if (NeedVolatile) {
           // volatile restrict version
@@ -8448,7 +8448,7 @@ class BuiltinOperatorOverloadBuilder {
                                               (Qualifiers::Volatile |
                                                Qualifiers::Restrict)));
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                                /*IsAssigmentOperator=*/isEqualOp);
+                                /*IsAssignmentOperator=*/isEqualOp);
         }
       }
     }
@@ -8469,7 +8469,7 @@ class BuiltinOperatorOverloadBuilder {
 
         // non-volatile version
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                              /*IsAssigmentOperator=*/true);
+                              /*IsAssignmentOperator=*/true);
 
         bool NeedVolatile = !(*Ptr).isVolatileQualified() &&
                            VisibleTypeConversionsQuals.hasVolatile();
@@ -8478,7 +8478,7 @@ class BuiltinOperatorOverloadBuilder {
           ParamTypes[0] =
             S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr));
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                                /*IsAssigmentOperator=*/true);
+                                /*IsAssignmentOperator=*/true);
         }
 
         if (!(*Ptr).isRestrictQualified() &&
@@ -8487,7 +8487,7 @@ class BuiltinOperatorOverloadBuilder {
           ParamTypes[0]
             = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr));
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                                /*IsAssigmentOperator=*/true);
+                                /*IsAssignmentOperator=*/true);
 
           if (NeedVolatile) {
             // volatile restrict version
@@ -8497,7 +8497,7 @@ class BuiltinOperatorOverloadBuilder {
                                                 (Qualifiers::Volatile |
                                                  Qualifiers::Restrict)));
             S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                                  /*IsAssigmentOperator=*/true);
+                                  /*IsAssignmentOperator=*/true);
           }
         }
       }
@@ -8530,14 +8530,14 @@ class BuiltinOperatorOverloadBuilder {
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] = S.Context.getLValueReferenceType(LeftBaseTy);
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                              /*IsAssigmentOperator=*/isEqualOp);
+                              /*IsAssignmentOperator=*/isEqualOp);
 
         // Add this built-in operator as a candidate (VQ is 'volatile').
         if (VisibleTypeConversionsQuals.hasVolatile()) {
           ParamTypes[0] = S.Context.getVolatileType(LeftBaseTy);
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                                /*IsAssigmentOperator=*/isEqualOp);
+                                /*IsAssignmentOperator=*/isEqualOp);
         }
       }
     }
@@ -8556,14 +8556,14 @@ class BuiltinOperatorOverloadBuilder {
         // Add this built-in operator as a candidate (VQ is empty).
         ParamTypes[0] = S.Context.getLValueReferenceType(*Vec1);
         S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                              /*IsAssigmentOperator=*/isEqualOp);
+                              /*IsAssignmentOperator=*/isEqualOp);
 
         // Add this built-in operator as a candidate (VQ is 'volatile').
         if (VisibleTypeConversionsQuals.hasVolatile()) {
           ParamTypes[0] = S.Context.getVolatileType(*Vec1);
           ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]);
           S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet,
-                                /*IsAssigmentOperator=*/isEqualOp);
+                                /*IsAssignmentOperator=*/isEqualOp);
         }
       }
     }
@@ -9012,7 +9012,7 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name,
         continue;
 
       AddOverloadCandidate(FD, FoundDecl, Args, CandidateSet,
-                           /*SupressUserConversions=*/false, PartialOverloading,
+                           /*SuppressUserConversions=*/false, PartialOverloading,
                            /*AllowExplicit*/ true,
                            /*AllowExplicitConversions*/ false,
                            ADLCallKind::UsesADL);
@@ -11763,7 +11763,7 @@ static void AddOverloadedCallCandidate(Sema &S,
       return;
 
     S.AddOverloadCandidate(Func, FoundDecl, Args, CandidateSet,
-                           /*SuppressUsedConversions=*/false,
+                           /*SuppressUserConversions=*/false,
                            PartialOverloading);
     return;
   }
@@ -11772,7 +11772,7 @@ static void AddOverloadedCallCandidate(Sema &S,
       = dyn_cast<FunctionTemplateDecl>(Callee)) {
     S.AddTemplateOverloadCandidate(FuncTemplate, FoundDecl,
                                    ExplicitTemplateArgs, Args, CandidateSet,
-                                   /*SuppressUsedConversions=*/false,
+                                   /*SuppressUserConversions=*/false,
                                    PartialOverloading);
     return;
   }
@@ -13066,7 +13066,7 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
         AddMethodTemplateCandidate(
             cast<FunctionTemplateDecl>(Func), I.getPair(), ActingDC,
             TemplateArgs, ObjectType, ObjectClassification, Args, CandidateSet,
-            /*SuppressUsedConversions=*/false);
+            /*SuppressUserConversions=*/false);
       }
     }
 
@@ -13262,7 +13262,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
        Oper != OperEnd; ++Oper) {
     AddMethodCandidate(Oper.getPair(), Object.get()->getType(),
                        Object.get()->Classify(Context), Args, CandidateSet,
-                       /*SuppressUserConversions=*/false);
+                       /*SuppressUserConversion=*/false);
   }
 
   // C++ [over.call.object]p2:
@@ -13537,7 +13537,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
   for (LookupResult::iterator Oper = R.begin(), OperEnd = R.end();
        Oper != OperEnd; ++Oper) {
     AddMethodCandidate(Oper.getPair(), Base->getType(), Base->Classify(Context),
-                       None, CandidateSet, /*SuppressUserConversions=*/false);
+                       None, CandidateSet, /*SuppressUserConversion=*/false);
   }
 
   bool HadMultipleCandidates = (CandidateSet.size() > 1);
@@ -13919,7 +13919,7 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found,
         if (MemExpr->getQualifier())
           Loc = MemExpr->getQualifierLoc().getBeginLoc();
         Base =
-            BuildCXXThisExpr(Loc, MemExpr->getBaseType(), /*isImplicit=*/true);
+            BuildCXXThisExpr(Loc, MemExpr->getBaseType(), /*IsImplicit=*/true);
       }
     } else
       Base = MemExpr->getBase();
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 0e5881e327a8b..480155df89901 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -2447,7 +2447,7 @@ StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc,
 
         ExprResult SizeOfVLAExprR = ActOnUnaryExprOrTypeTraitExpr(
             EndVar->getLocation(), UETT_SizeOf,
-            /*isType=*/true,
+            /*IsType=*/true,
             CreateParsedType(VAT->desugar(), Context.getTrivialTypeSourceInfo(
                                                  VAT->desugar(), RangeLoc))
                 .getAsOpaquePtr(),
@@ -2457,7 +2457,7 @@ StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc,
 
         ExprResult SizeOfEachElementExprR = ActOnUnaryExprOrTypeTraitExpr(
             EndVar->getLocation(), UETT_SizeOf,
-            /*isType=*/true,
+            /*IsType=*/true,
             CreateParsedType(VAT->desugar(),
                              Context.getTrivialTypeSourceInfo(
                                  VAT->getElementType(), RangeLoc))
diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp
index ec8958c3c5f90..b123a739a7ab1 100644
--- a/clang/lib/Sema/SemaStmtAsm.cpp
+++ b/clang/lib/Sema/SemaStmtAsm.cpp
@@ -849,7 +849,7 @@ Sema::LookupInlineAsmVarDeclField(Expr *E, StringRef Member,
     return CXXDependentScopeMemberExpr::Create(
         Context, E, T, /*IsArrow=*/false, AsmLoc, NestedNameSpecifierLoc(),
         SourceLocation(),
-        /*FirstQualifierInScope=*/nullptr, NameInfo, /*TemplateArgs=*/nullptr);
+        /*FirstQualifierFoundInScope=*/nullptr, NameInfo, /*TemplateArgs=*/nullptr);
   }
 
   const RecordType *RT = T->getAs<RecordType>();
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index fba8cd4eee063..3212281cc34d2 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -8428,8 +8428,8 @@ bool Sema::CheckFunctionTemplateSpecialization(
       // here that have a different target.
       if (LangOpts.CUDA &&
           IdentifyCUDATarget(Specialization,
-                             /* IgnoreImplicitHDAttributes = */ true) !=
-              IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) {
+                             /* IgnoreImplicitHDAttr = */ true) !=
+              IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttr = */ true)) {
         FailedCandidates.addCandidate().set(
             I.getPair(), FunTmpl->getTemplatedDecl(),
             MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info));
@@ -9587,7 +9587,7 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S,
     // have a different target.
     if (LangOpts.CUDA &&
         IdentifyCUDATarget(Specialization,
-                           /* IgnoreImplicitHDAttributes = */ true) !=
+                           /* IgnoreImplicitHDAttr = */ true) !=
             IdentifyCUDATarget(D.getDeclSpec().getAttributes())) {
       FailedCandidates.addCandidate().set(
           P.getPair(), FunTmpl->getTemplatedDecl(),
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 263bc3104efe8..b55a232d26c22 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -2872,7 +2872,7 @@ Sema::DeduceTemplateArguments(ClassTemplatePartialSpecializationDecl *Partial,
     return Sema::TDK_SubstitutionFailure;
 
   return ::FinishTemplateArgumentDeduction(
-      *this, Partial, /*PartialOrdering=*/false, TemplateArgs, Deduced, Info);
+      *this, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info);
 }
 
 /// Perform template argument deduction to determine whether
@@ -2913,7 +2913,7 @@ Sema::DeduceTemplateArguments(VarTemplatePartialSpecializationDecl *Partial,
     return Sema::TDK_SubstitutionFailure;
 
   return ::FinishTemplateArgumentDeduction(
-      *this, Partial, /*PartialOrdering=*/false, TemplateArgs, Deduced, Info);
+      *this, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info);
 }
 
 /// Determine whether the given type T is a simple-template-id type.
@@ -5067,7 +5067,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
                                    Info);
   auto *TST1 = T1->castAs<TemplateSpecializationType>();
   if (FinishTemplateArgumentDeduction(
-          S, P2, /*PartialOrdering=*/true,
+          S, P2, /*IsPartialOrdering=*/true,
           TemplateArgumentList(TemplateArgumentList::OnStack,
                                TST1->template_arguments()),
           Deduced, Info))
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 514cbd90d3b85..12bad62d1f1dd 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -751,7 +751,7 @@ static void maybeSynthesizeBlockSignature(TypeProcessingState &state,
       /*IsAmbiguous=*/false,
       /*LParenLoc=*/NoLoc,
       /*ArgInfo=*/nullptr,
-      /*NumArgs=*/0,
+      /*NumParams=*/0,
       /*EllipsisLoc=*/NoLoc,
       /*RParenLoc=*/NoLoc,
       /*RefQualifierIsLvalueRef=*/true,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index c3f3a370efc21..7f2c7f09e8a3e 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -8252,7 +8252,7 @@ void ASTReader::UpdateSema() {
   // Update the state of pragmas. Use the same API as if we had encountered the
   // pragma in the source.
   if(OptimizeOffPragmaLocation.isValid())
-    SemaObj->ActOnPragmaOptimize(/* IsOn = */ false, OptimizeOffPragmaLocation);
+    SemaObj->ActOnPragmaOptimize(/* On = */ false, OptimizeOffPragmaLocation);
   if (PragmaMSStructState != -1)
     SemaObj->ActOnPragmaMSStruct((PragmaMSStructKind)PragmaMSStructState);
   if (PointersToMembersPragmaLocation.isValid()) {
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 51a1e81525187..afaaa543bb27f 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2861,7 +2861,7 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
 
     case STMT_CXX_TRY:
       S = CXXTryStmt::Create(Context, Empty,
-             /*NumHandlers=*/Record[ASTStmtReader::NumStmtFields]);
+             /*numHandlers=*/Record[ASTStmtReader::NumStmtFields]);
       break;
 
     case STMT_CXX_FOR_RANGE:
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a22e97aaa5cd3..10946f9b0d985 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1447,7 +1447,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
   Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record);
 
   // Write out the diagnostic/pragma mappings.
-  WritePragmaDiagnosticMappings(Diags, /* IsModule = */ WritingModule);
+  WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule);
 
   // Leave the options block.
   Stream.ExitBlock();
diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp
index f90eab322785a..2db8f830c46de 100644
--- a/clang/lib/Serialization/GlobalModuleIndex.cpp
+++ b/clang/lib/Serialization/GlobalModuleIndex.cpp
@@ -658,8 +658,8 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
 
         // Find the imported module file.
         const FileEntry *DependsOnFile
-          = FileMgr.getFile(ImportedFile, /*openFile=*/false,
-                            /*cacheFailure=*/false);
+          = FileMgr.getFile(ImportedFile, /*OpenFile=*/false,
+                            /*CacheFailure=*/false);
 
         if (!DependsOnFile)
           return llvm::createStringError(std::errc::bad_file_descriptor,
diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp
index 3e45b30e250bd..6ae0c4f575519 100644
--- a/clang/lib/Serialization/ModuleManager.cpp
+++ b/clang/lib/Serialization/ModuleManager.cpp
@@ -42,8 +42,8 @@ using namespace clang;
 using namespace serialization;
 
 ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const {
-  const FileEntry *Entry = FileMgr.getFile(Name, /*openFile=*/false,
-                                           /*cacheFailure=*/false);
+  const FileEntry *Entry = FileMgr.getFile(Name, /*OpenFile=*/false,
+                                           /*CacheFailure=*/false);
   if (Entry)
     return lookup(Entry);
 
@@ -68,8 +68,8 @@ ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
 
 std::unique_ptr<llvm::MemoryBuffer>
 ModuleManager::lookupBuffer(StringRef Name) {
-  const FileEntry *Entry = FileMgr.getFile(Name, /*openFile=*/false,
-                                           /*cacheFailure=*/false);
+  const FileEntry *Entry = FileMgr.getFile(Name, /*OpenFile=*/false,
+                                           /*CacheFailure=*/false);
   return std::move(InMemoryBuffers[Entry]);
 }
 
@@ -184,7 +184,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
     } else {
       // Get a buffer of the file and close the file descriptor when done.
       Buf = FileMgr.getBufferForFile(NewModule->File,
-                                     /*IsVolatile=*/false,
+                                     /*isVolatile=*/false,
                                      /*ShouldClose=*/true);
     }
 
@@ -447,7 +447,7 @@ bool ModuleManager::lookupModuleFile(StringRef FileName,
 
   // Open the file immediately to ensure there is no race between stat'ing and
   // opening the file.
-  File = FileMgr.getFile(FileName, /*openFile=*/true, /*cacheFailure=*/false);
+  File = FileMgr.getFile(FileName, /*OpenFile=*/true, /*CacheFailure=*/false);
   if (!File)
     return false;
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 916a20e559f86..3cfe4dc82a100 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -144,7 +144,7 @@ static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD,
   QualType Ty = Ctx.getPointerType(Ctx.getRecordType(MD->getParent()));
 
   ProgramStateRef State = C.getState();
-  State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubclass=*/false);
+  State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubClassed=*/false);
   C.addTransition(State);
 }
 
@@ -307,7 +307,7 @@ void DynamicTypePropagation::checkPostStmt(const CXXNewExpr *NewE,
     return;
 
   C.addTransition(setDynamicTypeInfo(C.getState(), MR, NewE->getType(),
-                                     /*CanBeSubclass=*/false));
+                                     /*CanBeSubClassed=*/false));
 }
 
 const ObjCObjectType *
@@ -887,7 +887,7 @@ void DynamicTypePropagation::checkPostObjCMessage(const ObjCMethodCall &M,
     // MostSpecializedTypeArgsMap. We should only store anything in the later if
     // the stored data differs from the one stored in the former.
     State = setDynamicTypeInfo(State, RetRegion, ResultType,
-                               /*CanBeSubclass=*/true);
+                               /*CanBeSubClassed=*/true);
     Pred = C.addTransition(State);
   }
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp
index 0637c2b29645b..d471c23b83bfc 100644
--- a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp
@@ -196,7 +196,7 @@ static void emitDiagnostics(const BoundNodes &Nodes,
     ADC->getDecl(),
     Checker,
     /*Name=*/"GCD performance anti-pattern",
-    /*Category=*/"Performance",
+    /*BugCategory=*/"Performance",
     OS.str(),
     PathDiagnosticLocation::createBegin(SW, BR.getSourceManager(), ADC),
     SW->getSourceRange());
diff --git a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
index d575b2fd6ecb3..cc2cfb7742270 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
@@ -115,7 +115,7 @@ bool FindIdenticalExprVisitor::VisitIfStmt(const IfStmt *I) {
   if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Stmt1)) {
     if (!CS->body_empty()) {
       const IfStmt *InnerIf = dyn_cast<IfStmt>(*CS->body_begin());
-      if (InnerIf && isIdenticalStmt(AC->getASTContext(), I->getCond(), InnerIf->getCond(), /*ignoreSideEffects=*/ false)) {
+      if (InnerIf && isIdenticalStmt(AC->getASTContext(), I->getCond(), InnerIf->getCond(), /*IgnoreSideEffects=*/ false)) {
         PathDiagnosticLocation ELoc(InnerIf->getCond(), BR.getSourceManager(), AC);
         BR.EmitBasicReport(AC->getDecl(), Checker, "Identical conditions",
           categories::LogicError,
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 03e779f3c52d8..8d9ab1f9e4834 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -1208,7 +1208,7 @@ void MallocChecker::checkPostObjCMessage(const ObjCMethodCall &Call,
   ProgramStateRef State = FreeMemAux(C, Call.getArgExpr(0),
                                      Call.getOriginExpr(), C.getState(),
                                      /*Hold=*/true, ReleasedAllocatedMemory,
-                                     /*RetNullOnFailure=*/true);
+                                     /*ReturnsNullOnFailure=*/true);
 
   C.addTransition(State);
 }
diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
index b7bf9f3db3ff6..af21c84b995b4 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
@@ -478,7 +478,7 @@ void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const {
     return;
 
   const MemRegion *Region =
-      getTrackRegion(Event.Location, /*CheckSuperregion=*/true);
+      getTrackRegion(Event.Location, /*CheckSuperRegion=*/true);
   if (!Region)
     return;
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp
index 27dadd09d7038..5b9895c338d81 100644
--- a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp
@@ -49,7 +49,7 @@ static void emitDiagnostics(const BoundNodes &Nodes,
     ADC->getDecl(),
     Checker,
     /*Name=*/"OSObject C-Style Cast",
-    /*Category=*/"Security",
+    /*BugCategory=*/"Security",
     OS.str(),
     PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), ADC),
     CE->getSourceRange());
diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp
index 40f82214e9495..d2371fe60d21d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp
@@ -136,7 +136,7 @@ static void emitDiagnostics(BoundNodes &Match, const Decl *D, BugReporter &BR,
       ADC->getDecl(), Checker,
       /*Name=*/(llvm::Twine(ActionMsg)
                 + " autoreleasing out parameter inside autorelease pool").str(),
-      /*Category=*/"Memory",
+      /*BugCategory=*/"Memory",
       (llvm::Twine(ActionMsg) + " autoreleasing out parameter " +
        (IsCapture ? "'" + PVD->getName() + "'" + " " : "") + "inside " +
        "autorelease pool that may exit before " + Name + " returns; consider "
diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp
index 31d2d7c125e26..4a3a8dae23a7f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp
@@ -951,7 +951,7 @@ bool RetainCountChecker::evalCall(const CallEvent &Call,
       // And on the original branch assume that both input and
       // output are non-zero.
       if (auto L = RetVal.getAs<DefinedOrUnknownSVal>())
-        state = state->assume(*L, /*Assumption=*/true);
+        state = state->assume(*L, /*assumption=*/true);
 
     }
   }
diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp
index 927e9ae443609..796fd882ffd5e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp
@@ -65,7 +65,7 @@ StringRef RefCountBug::getDescription() const {
 
 RefCountBug::RefCountBug(const CheckerBase *Checker, RefCountBugType BT)
     : BugType(Checker, bugTypeToName(BT), categories::MemoryRefCount,
-              /*SupressOnSink=*/BT == LeakWithinFunction || BT == LeakAtReturn),
+              /*SuppressOnSink=*/BT == LeakWithinFunction || BT == LeakAtReturn),
       BT(BT), Checker(Checker) {}
 
 static bool isNumericLiteralExpression(const Expr *E) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp
index e744ff9d7c9e5..5e305aa709b64 100644
--- a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp
@@ -115,7 +115,7 @@ static void emitDiagnostics(BoundNodes &Match,
 
   BR.EmitBasicReport(ADC->getDecl(), Checker,
                      /*Name=*/"Memory leak inside autorelease pool",
-                     /*Category=*/"Memory",
+                     /*BugCategory=*/"Memory",
                      /*Name=*/
                      (Twine("Temporary objects allocated in the") +
                       " autorelease pool " +
diff --git a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
index 417b07d14be51..62a4c2ab0209c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
@@ -87,7 +87,7 @@ class TrustNonnullChecker : public Checker<check::PostCall,
 
     if (isNonNullPtr(Call, C))
       if (auto L = Call.getReturnValue().getAs<Loc>())
-        State = State->assume(*L, /*Assumption=*/true);
+        State = State->assume(*L, /*assumption=*/true);
 
     C.addTransition(State);
   }
@@ -106,7 +106,7 @@ class TrustNonnullChecker : public Checker<check::PostCall,
         (Msg.getSelector() == SetObjectForKeyedSubscriptSel ||
          Msg.getSelector() == SetObjectForKeySel)) {
       if (auto L = Msg.getArgSVal(1).getAs<Loc>())
-        State = State->assume(*L, /*Assumption=*/true);
+        State = State->assume(*L, /*assumption=*/true);
     }
 
     // Record an implication: index is non-null if the output is non-null.
diff --git a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
index 95f2b703cdd65..1b1ffff5ade82 100644
--- a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp
@@ -23,7 +23,7 @@ AnalysisManager::AnalysisManager(ASTContext &ASTCtx, DiagnosticsEngine &diags,
     : AnaCtxMgr(
           ASTCtx, Options.UnoptimizedCFG,
           Options.ShouldIncludeImplicitDtorsInCFG,
-          /*AddInitializers=*/true,
+          /*addInitializers=*/true,
           Options.ShouldIncludeTemporaryDtorsInCFG,
           Options.ShouldIncludeLifetimeInCFG,
           // Adding LoopExit elements to the CFG is a requirement for loop
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index 6627633f39332..e5a0794f10e2c 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -1259,7 +1259,7 @@ generateEmptyDiagnosticForReport(BugReport *R, SourceManager &SM) {
   return llvm::make_unique<PathDiagnostic>(
       R->getBugType().getCheckName(), R->getDeclWithIssue(),
       R->getBugType().getName(), R->getDescription(),
-      R->getShortDescription(/*Fallback=*/false), BT.getCategory(),
+      R->getShortDescription(/*UseFallback=*/false), BT.getCategory(),
       R->getUniqueingLocation(), R->getUniqueingDecl(),
       findExecutedLines(SM, R->getErrorNode()));
 }
diff --git a/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp b/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
index 75ae2606910ac..79424452240d7 100644
--- a/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
+++ b/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
@@ -36,7 +36,7 @@ DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State,
 
   // Otherwise, fall back to what we know about the region.
   if (const auto *TR = dyn_cast<TypedRegion>(Reg))
-    return DynamicTypeInfo(TR->getLocationType(), /*CanBeSubclass=*/false);
+    return DynamicTypeInfo(TR->getLocationType(), /*CanBeSub=*/false);
 
   if (const auto *SR = dyn_cast<SymbolicRegion>(Reg)) {
     SymbolRef Sym = SR->getSymbol();
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 12094c6a1e6c9..1fef5b3c1edd5 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1568,7 +1568,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
           ProgramStateRef NewState =
             createTemporaryRegionIfNeeded(State, LCtx, OCE->getArg(0));
           if (NewState != State) {
-            Pred = Bldr.generateNode(OCE, Pred, NewState, /*Tag=*/nullptr,
+            Pred = Bldr.generateNode(OCE, Pred, NewState, /*tag=*/nullptr,
                                      ProgramPoint::PreStmtKind);
             // Did we cache out?
             if (!Pred)
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index e00a08b2162c1..b935e3afe34b2 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -327,30 +327,30 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) {
       ExplodedNodeSet DstPostPostCallCallback;
       getCheckerManager().runCheckersForPostCall(DstPostPostCallCallback,
                                                  CEENode, *UpdatedCall, *this,
-                                                 /*WasInlined=*/true);
+                                                 /*wasInlined=*/true);
       for (auto I : DstPostPostCallCallback) {
         getCheckerManager().runCheckersForNewAllocator(
             CNE,
             *getObjectUnderConstruction(I->getState(), CNE,
                                         calleeCtx->getParent()),
             DstPostCall, I, *this,
-            /*WasInlined=*/true);
+            /*wasInlined=*/true);
       }
     } else {
       getCheckerManager().runCheckersForPostCall(DstPostCall, CEENode,
                                                  *UpdatedCall, *this,
-                                                 /*WasInlined=*/true);
+                                                 /*wasInlined=*/true);
     }
     ExplodedNodeSet Dst;
     if (const ObjCMethodCall *Msg = dyn_cast<ObjCMethodCall>(Call)) {
       getCheckerManager().runCheckersForPostObjCMessage(Dst, DstPostCall, *Msg,
                                                         *this,
-                                                        /*WasInlined=*/true);
+                                                        /*wasInlined=*/true);
     } else if (CE &&
                !(isa<CXXNewExpr>(CE) && // Called when visiting CXXNewExpr.
                  AMgr.getAnalyzerOptions().MayInlineCXXAllocator)) {
       getCheckerManager().runCheckersForPostStmt(Dst, DstPostCall, CE,
-                                                 *this, /*WasInlined=*/true);
+                                                 *this, /*wasInlined=*/true);
     } else {
       Dst.insert(DstPostCall);
     }
@@ -645,7 +645,7 @@ ProgramStateRef ExprEngine::bindReturnValue(const CallEvent &Call,
     ITraits.setTrait(TargetR,
         RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
     State = State->invalidateRegions(TargetR, E, Count, LCtx,
-                                     /* CausedByPointerEscape=*/false, nullptr,
+                                     /* CausesPointerEscape=*/false, nullptr,
                                      &Call, &ITraits);
 
     R = State->getSVal(Target.castAs<Loc>(), E->getType());
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index a389619f84260..d2aea1fd92dda 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -1253,7 +1253,7 @@ RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K,
   // Bind the globals memory space to a new symbol that we will use to derive
   // the bindings for all globals.
   const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K);
-  SVal V = svalBuilder.conjureSymbolVal(/* SymbolTag = */ (const void*) GS, Ex, LCtx,
+  SVal V = svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void*) GS, Ex, LCtx,
                                         /* type does not matter */ Ctx.IntTy,
                                         Count);
 
diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index aaf29abd47309..84c52f53ca5e7 100644
--- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -525,7 +525,7 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
       case BO_Sub:
         if (resultTy->isIntegralOrEnumerationType())
           return makeIntVal(0, resultTy);
-        return evalCastFromNonLoc(makeIntVal(0, /*Unsigned=*/false), resultTy);
+        return evalCastFromNonLoc(makeIntVal(0, /*isUnsigned=*/false), resultTy);
       case BO_Or:
       case BO_And:
         return evalCastFromNonLoc(lhs, resultTy);
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index d003937825bcd..bb5c0bb711b99 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1606,7 +1606,7 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   // Handle generation of import library from a def file.
   if (!args.hasArg(OPT_INPUT)) {
     fixupExports();
-    createImportLibrary(/*AsLib=*/true);
+    createImportLibrary(/*asLib=*/true);
     return;
   }
 
@@ -1830,7 +1830,7 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   // need to create a .lib file.
   if (!config->exports.empty() || config->dll) {
     fixupExports();
-    createImportLibrary(/*AsLib=*/false);
+    createImportLibrary(/*asLib=*/false);
     assignExportOrdinals();
   }
 
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index 59bdaec2edd8b..edc3b3707c7b6 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -348,7 +348,7 @@ class TemporaryFile {
     // IsVolatileSize=true forces MemoryBuffer to not use mmap().
     return CHECK(MemoryBuffer::getFile(path, /*FileSize=*/-1,
                                        /*RequiresNullTerminator=*/false,
-                                       /*IsVolatileSize=*/true),
+                                       /*IsVolatile=*/true),
                  "could not open " + path);
   }
 
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index f4f52b4b6fa2f..17c5860a0d785 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -272,7 +272,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) {
 // Add a given library by searching it from input search paths.
 void LinkerDriver::addLibrary(StringRef name) {
   if (Optional<std::string> path = searchLibrary(name))
-    addFile(*path, /*WithLOption=*/true);
+    addFile(*path, /*withLOption=*/true);
   else
     error("unable to find library -l" + name);
 }
@@ -1118,7 +1118,7 @@ void LinkerDriver::createFiles(opt::InputArgList &args) {
       addLibrary(arg->getValue());
       break;
     case OPT_INPUT:
-      addFile(arg->getValue(), /*WithLOption=*/false);
+      addFile(arg->getValue(), /*withLOption=*/false);
       break;
     case OPT_defsym: {
       StringRef from;
@@ -1386,7 +1386,7 @@ static void replaceCommonSymbols() {
     bss->markDead();
     inputSections.push_back(bss);
     s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type,
-                       /*Value=*/0, s->size, bss});
+                       /*value=*/0, s->size, bss});
   });
 }
 
@@ -1540,7 +1540,7 @@ template <class ELFT> void LinkerDriver::compileBitcodeFiles() {
 
   for (InputFile *file : lto->compile()) {
     auto *obj = cast<ObjFile<ELFT>>(file);
-    obj->parse(/*IgnoreComdats=*/true);
+    obj->parse(/*ignoreComdats=*/true);
     for (Symbol *sym : obj->getGlobalSymbols())
       sym->parseSymbolVersion();
     objectFiles.push_back(file);
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 89b178decba2a..fda2a544aed88 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -552,11 +552,11 @@ static void addDependentLibrary(StringRef specifier, const InputFile *f) {
   if (!config->dependentLibraries)
     return;
   if (fs::exists(specifier))
-    driver->addFile(specifier, /*WithLOption=*/false);
+    driver->addFile(specifier, /*withLOption=*/false);
   else if (Optional<std::string> s = findFromSearchPaths(specifier))
-    driver->addFile(*s, /*WithLOption=*/true);
+    driver->addFile(*s, /*withLOption=*/true);
   else if (Optional<std::string> s = searchLibraryBaseName(specifier))
-    driver->addFile(*s, /*WithLOption=*/true);
+    driver->addFile(*s, /*withLOption=*/true);
   else
     error(toString(f) +
           ": unable to find library from dependent library specifier: " +
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index e227d0bb7093c..8f0aa660145a4 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -280,26 +280,26 @@ void ScriptParser::addFile(StringRef s) {
     SmallString<128> pathData;
     StringRef path = (config->sysroot + s).toStringRef(pathData);
     if (sys::fs::exists(path)) {
-      driver->addFile(saver.save(path), /*WithLOption=*/false);
+      driver->addFile(saver.save(path), /*withLOption=*/false);
       return;
     }
   }
 
   if (s.startswith("/")) {
-    driver->addFile(s, /*WithLOption=*/false);
+    driver->addFile(s, /*withLOption=*/false);
   } else if (s.startswith("=")) {
     if (config->sysroot.empty())
-      driver->addFile(s.substr(1), /*WithLOption=*/false);
+      driver->addFile(s.substr(1), /*withLOption=*/false);
     else
       driver->addFile(saver.save(config->sysroot + "/" + s.substr(1)),
-                      /*WithLOption=*/false);
+                      /*withLOption=*/false);
   } else if (s.startswith("-l")) {
     driver->addLibrary(s.substr(2));
   } else if (sys::fs::exists(s)) {
-    driver->addFile(s, /*WithLOption=*/false);
+    driver->addFile(s, /*withLOption=*/false);
   } else {
     if (Optional<std::string> path = findFromSearchPaths(s))
-      driver->addFile(saver.save(*path), /*WithLOption=*/true);
+      driver->addFile(saver.save(*path), /*withLOption=*/true);
     else
       setError("unable to find " + s);
   }
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 33a3c87afc105..32e935126e790 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -319,7 +319,7 @@ class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk {
 
 Defined *Thunk::addSymbol(StringRef name, uint8_t type, uint64_t value,
                           InputSectionBase &section) {
-  Defined *d = addSyntheticLocal(name, type, value, /*Size=*/0, section);
+  Defined *d = addSyntheticLocal(name, type, value, /*size=*/0, section);
   syms.push_back(d);
   return d;
 }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 173be72e7fe07..c2d050d9ec855 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1278,12 +1278,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::experimental_vector_reduce_fmin:
       return ConcreteTTI->getMinMaxReductionCost(
           Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
-          /*IsSigned=*/true);
+          /*IsUnsigned=*/true);
     case Intrinsic::experimental_vector_reduce_umax:
     case Intrinsic::experimental_vector_reduce_umin:
       return ConcreteTTI->getMinMaxReductionCost(
           Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
-          /*IsSigned=*/false);
+          /*IsUnsigned=*/false);
     case Intrinsic::sadd_sat:
     case Intrinsic::ssub_sat: {
       Type *CondTy = Type::getInt1Ty(RetTy->getContext());
diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp
index e073c84f30904..486720cadd270 100644
--- a/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -27,7 +27,7 @@ ViewEdgeBundles("view-edge-bundles", cl::Hidden,
 char EdgeBundles::ID = 0;
 
 INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
-                /* cfg = */true, /* analysis = */ true)
+                /* cfg = */true, /* is_analysis = */ true)
 
 char &llvm::EdgeBundlesID = EdgeBundles::ID;
 
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c3e9c185be9a4..4d29e883d879c 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -998,7 +998,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
     while (!KilledRegs.empty()) {
       unsigned Reg = KilledRegs.pop_back_val();
       for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
-        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+        if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
           continue;
         if (TargetRegisterInfo::isVirtualRegister(Reg))
           LV->getVarInfo(Reg).Kills.push_back(&*I);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 2235689793715..639b588766a14 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3040,7 +3040,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
 
     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
                             getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
-                            /*AfterBlockPlacement=*/true)) {
+                            /*AfterPlacement=*/true)) {
       // Redo the layout if tail merging creates/removes/moves blocks.
       BlockToChain.clear();
       ComputedEdges.clear();
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 989a6a775afa1..bae3a4333bda7 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -92,7 +92,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
   Objects.insert(Objects.begin(),
                  StackObject(Size, Alignment, SPOffset, IsImmutable,
-                             /*isSpillSlot=*/false, /*Alloca=*/nullptr,
+                             /*IsSpillSlot=*/false, /*Alloca=*/nullptr,
                              IsAliased));
   return -++NumFixedObjects;
 }
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 9c75d0fd81e0d..4df5ce2dcedc1 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -165,7 +165,7 @@ void MachineFunction::init() {
                       !F.hasFnAttribute("no-realign-stack");
   FrameInfo = new (Allocator) MachineFrameInfo(
       getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
-      /*ForceRealign=*/CanRealignSP &&
+      /*ForcedRealign=*/CanRealignSP &&
           F.hasFnAttribute(Attribute::StackAlignment));
 
   if (F.hasFnAttribute(Attribute::StackAlignment))
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 72c4c0d82930f..22c23ba877e88 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -781,7 +781,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
       unsigned Reg = getRegForValue(Val);
       if (!Reg)
         return false;
-      Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+      Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
     }
   }
   return true;
@@ -830,8 +830,8 @@ bool FastISel::selectStackmap(const CallInst *I) {
   const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
   for (unsigned i = 0; ScratchRegs[i]; ++i)
     Ops.push_back(MachineOperand::CreateReg(
-        ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
-        /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+        ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+        /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
 
   // Issue CALLSEQ_START
   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -941,7 +941,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
     assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
     CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
     CLI.NumResultRegs = 1;
-    Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+    Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
   }
 
   // Add the <id> and <numBytes> constants.
@@ -990,13 +990,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
       unsigned Reg = getRegForValue(I->getArgOperand(i));
       if (!Reg)
         return false;
-      Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+      Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
     }
   }
 
   // Push the arguments from the call instruction.
   for (auto Reg : CLI.OutRegs)
-    Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+    Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
 
   // Push live variables for the stack map.
   if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
@@ -1010,13 +1010,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
   const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
   for (unsigned i = 0; ScratchRegs[i]; ++i)
     Ops.push_back(MachineOperand::CreateReg(
-        ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
-        /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+        ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+        /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
 
   // Add implicit defs (return values).
   for (auto Reg : CLI.InRegs)
-    Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
-                                            /*IsImpl=*/true));
+    Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
+                                            /*isImp=*/true));
 
   // Insert the patchpoint instruction before the call generated by the target.
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
@@ -1044,9 +1044,9 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
     return true; // don't do anything to this instruction.
   SmallVector<MachineOperand, 8> Ops;
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   MachineInstrBuilder MIB =
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
               TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
@@ -1063,11 +1063,11 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) {
     return true; // don't do anything to this instruction.
   SmallVector<MachineOperand, 8> Ops;
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
-                                          /*IsDef=*/false));
+                                          /*isDef=*/false));
   MachineInstrBuilder MIB =
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
               TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8b405562904f3..8b1759246b764 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -151,7 +151,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           auto Iter = CatchObjects.find(AI);
           if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
             FrameIndex = MF->getFrameInfo().CreateFixedObject(
-                TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
+                TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
             MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
           } else {
             FrameIndex =
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7c135864766fe..9b28c1a6c4501 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1476,7 +1476,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
       Flags.setZExt();
 
     for (unsigned i = 0; i < NumParts; ++i)
-      Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0));
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
   }
 }
 
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index d97d8e1dec57d..cdf79374e974f 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -1224,14 +1224,14 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
     if (!Load)
       Load = new LoadInst(V->getType(), SpillSlot,
                           Twine(V->getName(), ".wineh.reload"),
-                          /*Volatile=*/false, IncomingBlock->getTerminator());
+                          /*isVolatile=*/false, IncomingBlock->getTerminator());
 
     U.set(Load);
   } else {
     // Reload right before the old use.
     auto *Load = new LoadInst(V->getType(), SpillSlot,
                               Twine(V->getName(), ".wineh.reload"),
-                              /*Volatile=*/false, UsingInst);
+                              /*isVolatile=*/false, UsingInst);
     U.set(Load);
   }
 }
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index eb3d2070e5bcf..c575d6e782b93 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -310,7 +310,7 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
     OS << "value";
 
   OS << " '";
-  V->printAsOperand(OS, /*PrintTy=*/false, M);
+  V->printAsOperand(OS, /*PrintType=*/false, M);
   OS << "'\n";
 }
 
diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp
index d2dc860f7384f..7c48880f96eac 100644
--- a/llvm/lib/Support/APSInt.cpp
+++ b/llvm/lib/Support/APSInt.cpp
@@ -22,18 +22,18 @@ APSInt::APSInt(StringRef Str) {
 
   // (Over-)estimate the required number of bits.
   unsigned NumBits = ((Str.size() * 64) / 19) + 2;
-  APInt Tmp(NumBits, Str, /*Radix=*/10);
+  APInt Tmp(NumBits, Str, /*radix=*/10);
   if (Str[0] == '-') {
     unsigned MinBits = Tmp.getMinSignedBits();
     if (MinBits > 0 && MinBits < NumBits)
       Tmp = Tmp.trunc(MinBits);
-    *this = APSInt(Tmp, /*IsUnsigned=*/false);
+    *this = APSInt(Tmp, /*isUnsigned=*/false);
     return;
   }
   unsigned ActiveBits = Tmp.getActiveBits();
   if (ActiveBits > 0 && ActiveBits < NumBits)
     Tmp = Tmp.trunc(ActiveBits);
-  *this = APSInt(Tmp, /*IsUnsigned=*/true);
+  *this = APSInt(Tmp, /*isUnsigned=*/true);
 }
 
 void APSInt::Profile(FoldingSetNodeID& ID) const {
diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp
index cffcfff7c735e..fe77cb3db4139 100644
--- a/llvm/lib/Support/LowLevelType.cpp
+++ b/llvm/lib/Support/LowLevelType.cpp
@@ -17,14 +17,14 @@ using namespace llvm;
 
 LLT::LLT(MVT VT) {
   if (VT.isVector()) {
-    init(/*isPointer=*/false, VT.getVectorNumElements() > 1,
+    init(/*IsPointer=*/false, VT.getVectorNumElements() > 1,
          VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(),
          /*AddressSpace=*/0);
   } else if (VT.isValid()) {
     // Aggregates are no different from real scalars as far as GlobalISel is
     // concerned.
     assert(VT.getSizeInBits() != 0 && "invalid zero-sized type");
-    init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0,
+    init(/*IsPointer=*/false, /*IsVector=*/false, /*NumElements=*/0,
          VT.getSizeInBits(), /*AddressSpace=*/0);
   } else {
     IsPointer = false;
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 4124121b86b1f..2baccaa0cbd7a 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -612,7 +612,7 @@ raw_fd_ostream::~raw_fd_ostream() {
   // destructing raw_ostream objects which may have errors.
   if (has_error())
     report_fatal_error("IO failure on output stream: " + error().message(),
-                       /*GenCrashDiag=*/false);
+                       /*gen_crash_diag=*/false);
 }
 
 #if defined(_WIN32)
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 911946111791b..8dc2768b95976 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2365,7 +2365,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
                                         AArch64::sub_32);
 
   if ((BW < 32) && !IsBitTest)
-    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true);
+    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
 
   // Emit the combined compare and branch instruction.
   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
@@ -4272,7 +4272,7 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
   const TargetRegisterClass *RC =
       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
   if (NeedTrunc) {
-    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false);
+    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
     Op0IsKill = Op1IsKill = true;
   }
@@ -4952,7 +4952,7 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
   MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
-    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
+    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
     IdxNIsKill = true;
   } else if (IdxVT.bitsGT(PtrVT))
     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 50871e1a0f150..f7231471c1077 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -119,11 +119,11 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
       auto T = ArrayType::get(Type::getInt64Ty(C), 2);
       auto *GV = new GlobalVariable(
           M, T,
-          /*IsConstant=*/false, GlobalValue::ExternalLinkage,
+          /*isConstant=*/false, GlobalValue::ExternalLinkage,
           /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
           /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
           AMDGPUAS::GLOBAL_ADDRESS,
-          /*IsExternallyInitialized=*/false);
+          /*isExternallyInitialized=*/false);
       LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
 
       for (auto U : F.users()) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4eb8e0738a900..09b78115f2e3c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2259,7 +2259,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         unsigned TargetFlags = GV->hasDLLImportStorageClass()
                                    ? ARMII::MO_DLLIMPORT
                                    : ARMII::MO_NO_FLAG;
-        Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
+        Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0,
                                             TargetFlags);
         if (GV->hasDLLImportStorageClass())
           Callee =
@@ -2914,7 +2914,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
     auto M = const_cast<Module*>(DAG.getMachineFunction().
                                  getFunction().getParent());
     auto GV = new GlobalVariable(
-                    *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
+                    *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
                     Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
                     Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
                     Twine(AFI->createPICLabelUId())
@@ -3467,7 +3467,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
   // FIXME: Once remat is capable of dealing with instructions with register
   // operands, expand this into two nodes.
   Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
-                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+                       DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
                                                   TargetFlags));
   if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
index d983ef2fc4f10..4313fa5a82b54 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp
@@ -34,7 +34,7 @@ class LanaiELFObjectWriter : public MCELFObjectTargetWriter {
 
 LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI)
     : MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI,
-                              /*HasRelocationAddend=*/true) {}
+                              /*HasRelocationAddend_=*/true) {}
 
 unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/,
                                             const MCValue & /*Target*/,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 13bd7ee4be956..8d8ba5644e103 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -36,8 +36,8 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter {
 } // end anonymous namespace
 
 SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
-  : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
-                            /*HasRelocationAddend=*/ true) {}
+  : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390,
+                            /*HasRelocationAddend_=*/ true) {}
 
 // Return the relocation type for an absolute value of MCFixupKind Kind.
 static unsigned getAbsoluteReloc(unsigned Kind) {
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index a439b724d9674..b5d4d369b7265 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -194,7 +194,7 @@ static std::string toString(const APFloat &FP) {
   static const size_t BufBytes = 128;
   char Buf[BufBytes];
   auto Written = FP.convertToHexString(
-      Buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven);
+      Buf, /*HexDigits=*/0, /*UpperCase=*/false, APFloat::rmNearestTiesToEven);
   (void)Written;
   assert(Written != 0);
   assert(Written < BufBytes);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 1a24f749b5644..312b203859d51 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -115,7 +115,7 @@ class WebAssemblyFastISel final : public FastISel {
 private:
   // Utility helper routines
   MVT::SimpleValueType getSimpleType(Type *Ty) {
-    EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
+    EVT VT = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
     return VT.isSimple() ? VT.getSimpleVT().SimpleTy
                          : MVT::INVALID_SIMPLE_VALUE_TYPE;
   }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 0bfebc32a820e..ea9cfc00adfdb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -81,7 +81,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
     if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
       MI.getOperand(OffsetOperandNum).setImm(Offset);
       MI.getOperand(FIOperandNum)
-          .ChangeToRegister(FrameRegister, /*IsDef=*/false);
+          .ChangeToRegister(FrameRegister, /*isDef=*/false);
       return;
     }
   }
@@ -102,7 +102,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
           MachineOperand &ImmMO = Def->getOperand(1);
           ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
           MI.getOperand(FIOperandNum)
-              .ChangeToRegister(FrameRegister, /*IsDef=*/false);
+              .ChangeToRegister(FrameRegister, /*isDef=*/false);
           return;
         }
       }
@@ -127,7 +127,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
         .addReg(FrameRegister)
         .addReg(OffsetOp);
   }
-  MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
+  MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*isDef=*/false);
 }
 
 Register
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 31cd83d942096..7b9ce02712053 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -289,7 +289,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
 }
 
 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
-  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
+  EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
   if (evt == MVT::Other || !evt.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index f50e91fdabe9d..e310fe0691171 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -3170,7 +3170,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
   MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
   int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
   int UnwindHelpFI =
-      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
 
   // Store -2 into UnwindHelp on function entry. We have to scan forwards past
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 34a85806f563e..23926ca80527d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3021,7 +3021,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
       // load from our portion of it. This assumes that if the first part of an
       // argument is in memory, the rest will also be in memory.
       int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
-                                     /*Immutable=*/false);
+                                     /*IsImmutable=*/false);
       PartAddr = DAG.getFrameIndex(FI, PtrVT);
       return DAG.getLoad(
           ValVT, dl, Chain, PartAddr,
@@ -23719,7 +23719,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
       // Set up a frame object for the return address.
       unsigned SlotSize = RegInfo->getSlotSize();
       FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
-          SlotSize, /*Offset=*/0, /*IsImmutable=*/false);
+          SlotSize, /*SPOffset=*/0, /*IsImmutable=*/false);
       FuncInfo->setFAIndex(FrameAddrIndex);
     }
     return DAG.getFrameIndex(FrameAddrIndex, VT);
diff --git a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
index 22d168c1e391c..9e499db1d7ee7 100644
--- a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -250,7 +250,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
 
       // Do the probe.
       STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL,
-                                              /*InPrologue=*/false);
+                                              /*InProlog=*/false);
     } else {
       // Sub
       BuildMI(*MBB, I, DL,
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index e6f501e5d196c..692697d6f32e8 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -113,7 +113,7 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
     StructType *FrameTy = StructType::create(C, "NoopCoro.Frame");
     auto *FramePtrTy = FrameTy->getPointerTo();
     auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
-                                   /*IsVarArgs=*/false);
+                                   /*isVarArg=*/false);
     auto *FnPtrTy = FnTy->getPointerTo();
     FrameTy->setBody({FnPtrTy, FnPtrTy});
 
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 174430da171f6..58bf22bee29b4 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -378,7 +378,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
   StructType *FrameTy = StructType::create(C, Name);
   auto *FramePtrTy = FrameTy->getPointerTo();
   auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
-                                 /*IsVarArgs=*/false);
+                                 /*isVarArg=*/false);
   auto *FnPtrTy = FnTy->getPointerTo();
 
   // Figure out how wide should be an integer type storing the suspend index.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 8afb2f0ff115e..5458e70ff16ad 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -866,7 +866,7 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
 
   LLVMContext &C = M.getContext();
   auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
-                                 /*IsVarArgs=*/false);
+                                 /*isVarArg=*/false);
   Function *DevirtFn =
       Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
                        CORO_DEVIRT_TRIGGER_FN, &M);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3a8ec1ecd38b9..2b9859b602f49 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -967,7 +967,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
     // Can it be decomposed into  icmp eq (X & Mask), 0  ?
     if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
                                    Pred, X, UnsetBitsMask,
-                                   /*LookThruTrunc=*/false) &&
+                                   /*LookThroughTrunc=*/false) &&
         Pred == ICmpInst::ICMP_EQ)
       return true;
     // Is it  icmp eq (X & Mask), 0  already?
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 73172877d9ecc..cc753ce05313e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -624,7 +624,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
   if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
     return false;
 
-  APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+  APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
   if (IsSigned)
     APInt::sdivrem(C1, C2, Quotient, Remainder);
   else
@@ -661,7 +661,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     // (X / C1) / C2  -> X / (C1*C2)
     if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) ||
         (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) {
-      APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+      APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
       if (!multiplyOverflows(*C1, *C2, Product, IsSigned))
         return BinaryOperator::Create(I.getOpcode(), X,
                                       ConstantInt::get(Ty, Product));
@@ -669,7 +669,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
 
     if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
         (!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
-      APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+      APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
 
       // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
       if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
@@ -693,7 +693,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
          *C1 != C1->getBitWidth() - 1) ||
         (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) {
-      APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+      APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
       APInt C1Shifted = APInt::getOneBitSet(
           C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
 
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 450ae2f79026e..22e8b4ee2e298 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -354,7 +354,7 @@ void HWAddressSanitizer::initializeModule(Module &M) {
 
   if (!TargetTriple.isAndroid()) {
     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
-      auto *GV = new GlobalVariable(M, IntptrTy, /*isConstantGlobal=*/false,
+      auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
                                     GlobalValue::ExternalLinkage, nullptr,
                                     "__hwasan_tls", nullptr,
                                     GlobalVariable::InitialExecTLSModel);
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 8ec071536ec2d..89497177524fb 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -541,7 +541,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
   // Find the smallest power of two bitwidth that's sufficient to hold Instr's
   // operands.
   auto OrigWidth = Instr->getType()->getIntegerBitWidth();
-  ConstantRange OperandRange(OrigWidth, /*isFullset=*/false);
+  ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false);
   for (Value *Operand : Instr->operands()) {
     OperandRange = OperandRange.unionWith(
         LVI->getConstantRange(Operand, Instr->getParent()));
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index a0935efa264be..4f83e869b3032 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -436,7 +436,7 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
     } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
       NewOperands.push_back(convert(VI, ToTy));
     } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false);
       bool Exact;
       CF->getValueAPF().convertToInteger(Val,
                                          APFloat::rmNearestTiesToEven,
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 6590f1d387f6a..59a387a186b83 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -3125,7 +3125,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
   MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
   int64_t IncOffset = IncConst->getValue()->getSExtValue();
   if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
-                        IncOffset, /*HaseBaseReg=*/false))
+                        IncOffset, /*HasBaseReg=*/false))
     return false;
 
   return true;
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 8062fe4990832..8256e3b5f5afd 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -494,7 +494,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
     KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI);
     // TODO Shouldn't this create a signed range?
     ConstantRange KnownBitsRange =
-        ConstantRange::fromKnownBits(Known, /*ForSigned=*/false);
+        ConstantRange::fromKnownBits(Known, /*IsSigned=*/false);
     const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI);
     ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange);
     // We delegate removal of unreachable non-default cases to other passes. In
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3e301d7c548eb..11651d040dc0d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5025,7 +5025,7 @@ SwitchLookupTable::SwitchLookupTable(
   ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
   Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
 
-  Array = new GlobalVariable(M, ArrayTy, /*constant=*/true,
+  Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
                              GlobalVariable::PrivateLinkage, Initializer,
                              "switch.table." + FuncName);
   Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);

From 1ffceaa54361af7120c87656bd949880d5426355 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Tue, 16 Jul 2019 04:56:43 +0000
Subject: [PATCH 209/451] [RISCV] Match GNU tools canonical JALR and add
 aliases

The canonical GNU form of JALR resembles a load/store instruction rather
than placing the immediate offset as a separate argument, so match this
behaviour. Also add parser-only aliases for the three-operand form, and
add other shorter aliases also emitted by GNU tools.

Differential Revision: https://reviews.llvm.org/D55277
Patch by James Clarke.

llvm-svn: 366179
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.td      | 25 ++++++++++++-----
 llvm/test/CodeGen/RISCV/branch-relaxation.ll |  3 ++-
 llvm/test/CodeGen/RISCV/indirectbr.ll        |  2 +-
 llvm/test/CodeGen/RISCV/option-rvc.ll        |  2 +-
 llvm/test/MC/RISCV/compress-rv32i.s          |  4 +--
 llvm/test/MC/RISCV/fixups.s                  |  6 ++---
 llvm/test/MC/RISCV/rv32e-valid.s             |  2 +-
 llvm/test/MC/RISCV/rv32i-valid.s             | 24 +++++++----------
 llvm/test/MC/RISCV/rvi-aliases-valid.s       | 28 ++++++++++++++++----
 9 files changed, 60 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b017307b46173..69bde15f12187 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -357,7 +357,7 @@ def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
 let isCall = 1 in
 def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd),
                    (ins GPR:$rs1, simm12:$imm12),
-                   "jalr", "$rd, $rs1, $imm12">;
+                   "jalr", "$rd, ${imm12}(${rs1})">;
 } // hasSideEffects = 0, mayLoad = 0, mayStore = 0
 
 def BEQ  : BranchCC_rri<0b000, "beq">;
@@ -597,12 +597,23 @@ def : InstAlias<"bgtu $rs, $rt, $offset",
 def : InstAlias<"bleu $rs, $rt, $offset",
                 (BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>;
 
-// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction.
-def : InstAlias<"j $offset",   (JAL  X0, simm21_lsb0_jal:$offset)>;
-def : InstAlias<"jal $offset", (JAL  X1, simm21_lsb0_jal:$offset)>;
-def : InstAlias<"jr $rs",      (JALR X0, GPR:$rs, 0)>;
-def : InstAlias<"jalr $rs",    (JALR X1, GPR:$rs, 0)>;
-def : InstAlias<"ret",         (JALR X0,      X1, 0), 2>;
+def : InstAlias<"j $offset",   (JAL X0, simm21_lsb0_jal:$offset)>;
+def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>;
+
+// Non-zero offset aliases of "jalr" are the lowest weight, followed by the
+// two-register form, then the one-register forms and finally "ret".
+def : InstAlias<"jr $rs",                (JALR      X0, GPR:$rs, 0), 3>;
+def : InstAlias<"jr ${offset}(${rs})",   (JALR      X0, GPR:$rs, simm12:$offset)>;
+def : InstAlias<"jalr $rs",              (JALR      X1, GPR:$rs, 0), 3>;
+def : InstAlias<"jalr ${offset}(${rs})", (JALR      X1, GPR:$rs, simm12:$offset)>;
+def : InstAlias<"jalr $rd, $rs",         (JALR GPR:$rd, GPR:$rs, 0), 2>;
+def : InstAlias<"ret",                   (JALR      X0,      X1, 0), 4>;
+
+// Non-canonical forms for jump targets also accepted by the assembler.
+def : InstAlias<"jr $rs, $offset",        (JALR      X0, GPR:$rs, simm12:$offset), 0>;
+def : InstAlias<"jalr $rs, $offset",      (JALR      X1, GPR:$rs, simm12:$offset), 0>;
+def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset), 0>;
+
 // TODO call
 // TODO tail
 
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index cd589dd9cab37..56f0f27a06488 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -25,6 +25,7 @@ tail:
   ret void
 }
 
+; TODO: Extend simm12's MCOperandPredicate so the jalr zero is printed as a jr.
 define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-LABEL: relax_jal:
 ; CHECK:       # %bb.0:
@@ -32,7 +33,7 @@ define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-NEXT:    bnez a0, .LBB1_1
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    lui a0, %hi(.LBB1_2)
-; CHECK-NEXT:    jalr zero, a0, %lo(.LBB1_2)
+; CHECK-NEXT:    jalr zero, %lo(.LBB1_2)(a0)
 ; CHECK-NEXT:  .LBB1_1: # %iftrue
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
diff --git a/llvm/test/CodeGen/RISCV/indirectbr.ll b/llvm/test/CodeGen/RISCV/indirectbr.ll
index 1d916b8e10846..e734de3c8e496 100644
--- a/llvm/test/CodeGen/RISCV/indirectbr.ll
+++ b/llvm/test/CodeGen/RISCV/indirectbr.ll
@@ -25,7 +25,7 @@ define i32 @indirectbr_with_offset(i8* %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp)
-; RV32I-NEXT:    jalr zero, a0, 1380
+; RV32I-NEXT:    jr 1380(a0)
 ; RV32I-NEXT:  .LBB1_1:
 ; RV32I-NEXT:    mv a0, zero
 ; RV32I-NEXT:    lw ra, 12(sp)
diff --git a/llvm/test/CodeGen/RISCV/option-rvc.ll b/llvm/test/CodeGen/RISCV/option-rvc.ll
index 3c207bd424fa6..a0f09c052a49b 100644
--- a/llvm/test/CodeGen/RISCV/option-rvc.ll
+++ b/llvm/test/CodeGen/RISCV/option-rvc.ll
@@ -8,7 +8,7 @@
 define i32 @add(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: add:
 ; CHECK:    add a0, a1, a0
-; CHECK-NEXT:    jalr zero, ra, 0
+; CHECK-NEXT:    jalr zero, 0(ra)
   tail call void asm sideeffect ".option rvc", ""()
   %add = add nsw i32 %b, %a
   ret i32 %add
diff --git a/llvm/test/MC/RISCV/compress-rv32i.s b/llvm/test/MC/RISCV/compress-rv32i.s
index 149279c433044..1f8835faf35de 100644
--- a/llvm/test/MC/RISCV/compress-rv32i.s
+++ b/llvm/test/MC/RISCV/compress-rv32i.s
@@ -168,7 +168,7 @@ lw ra, 252(sp)
 # CHECK-ALIAS: ret
 # CHECK-INST: c.jr ra
 # CHECK: # encoding:  [0x82,0x80]
-jalr zero, ra, 0
+jalr zero, 0(ra)
 
 # CHECK-BYTES: 92 80
 # CHECK-ALIAS: add ra, zero, tp
@@ -192,7 +192,7 @@ ebreak
 # CHECK-ALIAS: jalr s0
 # CHECK-INST: c.jalr s0
 # CHECK: # encoding: [0x02,0x94]
-jalr ra, s0, 0
+jalr ra, 0(s0)
 
 # CHECK-BYTES: 3e 94
 # CHECK-ALIAS: add s0, s0, a5
diff --git a/llvm/test/MC/RISCV/fixups.s b/llvm/test/MC/RISCV/fixups.s
index f0377debabb9e..ca0ee131fb8af 100644
--- a/llvm/test/MC/RISCV/fixups.s
+++ b/llvm/test/MC/RISCV/fixups.s
@@ -68,16 +68,16 @@ func:
 call func
 # CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_riscv_call
 # CHECK-INSTR: auipc   ra, 0
-# CHECK-INSTR: jalr    ra, ra, -100
+# CHECK-INSTR: jalr    ra, -100(ra)
 
 .fill 10000
 call func
 # CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_riscv_call
 # CHECK-INSTR: auipc   ra, 1048574
-# CHECK-INSTR: jalr    ra, ra, -1916
+# CHECK-INSTR: jalr    ra, -1916(ra)
 
 .fill 20888
 call func
 # CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_riscv_call
 # CHECK-INSTR: auipc   ra, 1048568
-# CHECK-INSTR: jalr    ra, ra, 1764
+# CHECK-INSTR: jalr    ra, 1764(ra)
diff --git a/llvm/test/MC/RISCV/rv32e-valid.s b/llvm/test/MC/RISCV/rv32e-valid.s
index 25419768ad96a..42b85fb0535e6 100644
--- a/llvm/test/MC/RISCV/rv32e-valid.s
+++ b/llvm/test/MC/RISCV/rv32e-valid.s
@@ -14,7 +14,7 @@ auipc x1, 2
 
 # CHECK-ASM-AND-OBJ: jal sp, 4
 jal x2, 4
-# CHECK-ASM-AND-OBJ: jalr gp, gp, 4
+# CHECK-ASM-AND-OBJ: jalr gp, 4(gp)
 jalr x3, x3, 4
 
 # CHECK-ASM-AND-OBJ: beq tp, t0, 8
diff --git a/llvm/test/MC/RISCV/rv32i-valid.s b/llvm/test/MC/RISCV/rv32i-valid.s
index 3611d71ac4309..23ed9a2b4e847 100644
--- a/llvm/test/MC/RISCV/rv32i-valid.s
+++ b/llvm/test/MC/RISCV/rv32i-valid.s
@@ -94,27 +94,21 @@ jal s0, (0xff-99)
 # CHECK-OBJ: jal zero, 0
 jal zero, .
 
-# CHECK-ASM-AND-OBJ: jalr a0, a1, -2048
+# CHECK-ASM-AND-OBJ: jalr a0, -2048(a1)
 # CHECK-ASM: encoding: [0x67,0x85,0x05,0x80]
-jalr a0, a1, -2048
-# CHECK-ASM-AND-OBJ: jalr a0, a1, -2048
+jalr a0, -2048(a1)
+# CHECK-ASM-AND-OBJ: jalr a0, -2048(a1)
 # CHECK-ASM: encoding: [0x67,0x85,0x05,0x80]
-jalr a0, a1, ~2047
-# CHECK-ASM-AND-OBJ: jalr a0, a1, 0
-# CHECK-ASM: encoding: [0x67,0x85,0x05,0x00]
-jalr a0, a1, !1
-# CHECK-ASM-AND-OBJ: jalr a0, a1, -2048
-# CHECK-ASM: encoding: [0x67,0x85,0x05,0x80]
-jalr a0, a1, %lo(2048)
-# CHECK-ASM-AND-OBJ: jalr t2, t1, 2047
+jalr a0, %lo(2048)(a1)
+# CHECK-ASM-AND-OBJ: jalr t2, 2047(t1)
 # CHECK-ASM: encoding: [0xe7,0x03,0xf3,0x7f]
-jalr t2, t1, 2047
-# CHECK-ASM-AND-OBJ: jalr sp, zero, 256
+jalr t2, 2047(t1)
+# CHECK-ASM-AND-OBJ: jalr sp, 256(zero)
 # CHECK-ASM: encoding: [0x67,0x01,0x00,0x10]
 jalr sp, zero, 256
-# CHECK-ASM-AND-OBJ: jalr a1, a2, 30
+# CHECK-ASM-AND-OBJ: jalr a1, 30(a2)
 # CHECK-ASM: encoding: [0xe7,0x05,0xe6,0x01]
-jalr a1, a2, CONST
+jalr a1, CONST(a2)
 
 # CHECK-ASM-AND-OBJ: beq s1, s1, 102
 # CHECK-ASM: encoding: [0x63,0x83,0x94,0x06]
diff --git a/llvm/test/MC/RISCV/rvi-aliases-valid.s b/llvm/test/MC/RISCV/rvi-aliases-valid.s
index a3050aa46f621..71a50ec95aa41 100644
--- a/llvm/test/MC/RISCV/rvi-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rvi-aliases-valid.s
@@ -139,13 +139,31 @@ jal foo
 # CHECK-OBJ: jal 0
 # CHECK-OBJ: R_RISCV_JAL a0
 jal a0
-# CHECK-S-OBJ-NOALIAS: jalr zero, s4, 0
+# CHECK-S-OBJ-NOALIAS: jalr zero, 0(s4)
 # CHECK-S-OBJ: jr s4
 jr x20
-# CHECK-S-OBJ-NOALIAS: jalr ra, s5, 0
-# CHECK-S-OBJ: jalr s5
-jalr x21
-# CHECK-S-OBJ-NOALIAS: jalr zero, ra, 0
+# CHECK-S-OBJ-NOALIAS: jalr zero, 6(s5)
+# CHECK-S-OBJ: jr 6(s5)
+jr 6(x21)
+# CHECK-S-OBJ-NOALIAS: jalr zero, 7(s6)
+# CHECK-S-OBJ: jr 7(s6)
+jr x22, 7
+# CHECK-S-OBJ-NOALIAS: jalr ra, 0(s4)
+# CHECK-S-OBJ: jalr s4
+jalr x20
+# CHECK-S-OBJ-NOALIAS: jalr ra, 8(s5)
+# CHECK-S-OBJ: jalr 8(s5)
+jalr 8(x21)
+# CHECK-S-OBJ-NOALIAS: jalr s6, 0(s7)
+# CHECK-S-OBJ: jalr s6, s7
+jalr x22, x23
+# CHECK-S-OBJ-NOALIAS: jalr ra, 9(s8)
+# CHECK-S-OBJ: jalr 9(s8)
+jalr x24, 9
+# CHECK-S-OBJ-NOALIAS: jalr s9, 11(s10)
+# CHECK-S-OBJ: jalr s9, 11(s10)
+jalr x25, x26, 11
+# CHECK-S-OBJ-NOALIAS: jalr zero, 0(ra)
 # CHECK-S-OBJ: ret
 ret
 # TODO call

From 47cfe8f321515418ce8d1c00708bebdef1c330eb Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 16 Jul 2019 05:50:45 +0000
Subject: [PATCH 210/451] [ELF] Fix variable names in comments after
 VariableName -> variableName change

Also fix some typos.

llvm-svn: 366181
---
 lld/ELF/AArch64ErrataFix.cpp                  | 20 +++++------
 lld/ELF/AArch64ErrataFix.h                    |  2 +-
 lld/ELF/Arch/AArch64.cpp                      |  4 +--
 lld/ELF/Arch/ARM.cpp                          | 12 +++----
 lld/ELF/Arch/RISCV.cpp                        |  2 +-
 lld/ELF/CallGraphSort.cpp                     |  6 ++--
 lld/ELF/Config.h                              |  2 +-
 lld/ELF/Driver.cpp                            | 18 +++++-----
 lld/ELF/InputFiles.cpp                        | 12 +++----
 lld/ELF/InputFiles.h                          |  4 +--
 lld/ELF/InputSection.cpp                      |  6 ++--
 lld/ELF/InputSection.h                        |  4 +--
 lld/ELF/LTO.cpp                               |  6 ++--
 lld/ELF/LinkerScript.cpp                      |  6 ++--
 lld/ELF/Relocations.cpp                       | 24 ++++++-------
 lld/ELF/ScriptParser.h                        |  2 +-
 lld/ELF/SymbolTable.cpp                       |  4 +--
 lld/ELF/SyntheticSections.cpp                 | 36 +++++++++----------
 lld/ELF/SyntheticSections.h                   |  8 ++---
 lld/ELF/Target.h                              |  2 +-
 lld/ELF/Thunks.cpp                            |  4 +--
 lld/ELF/Writer.cpp                            |  8 ++---
 lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s |  2 +-
 lld/test/ELF/gdb-index-multiple-cu-2.s        |  4 +--
 lld/test/ELF/gdb-index-multiple-cu.s          | 12 +++----
 25 files changed, 104 insertions(+), 106 deletions(-)

diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp
index 7473ab61c4567..b2eda4dcbc4e9 100644
--- a/lld/ELF/AArch64ErrataFix.cpp
+++ b/lld/ELF/AArch64ErrataFix.cpp
@@ -413,8 +413,8 @@ void lld::elf::Patch843419Section::writeTo(uint8_t *buf) {
   write32le(buf, read32le(patchee->data().begin() + patcheeOffset));
 
   // Apply any relocation transferred from the original PatcheeSection.
-  // For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc
-  // also adds OutSecOff so we need to subtract to avoid double counting.
+  // For a SyntheticSection Buf already has outSecOff added, but relocateAlloc
+  // also adds outSecOff so we need to subtract to avoid double counting.
   this->relocateAlloc(buf - outSecOff, buf - outSecOff + getSize());
 
   // Return address is the next instruction after the one we have just copied.
@@ -427,7 +427,7 @@ void AArch64Err843419Patcher::init() {
   // The AArch64 ABI permits data in executable sections. We must avoid scanning
   // this data as if it were instructions to avoid false matches. We use the
   // mapping symbols in the InputObjects to identify this data, caching the
-  // results in SectionMap so we don't have to recalculate it each pass.
+  // results in sectionMap so we don't have to recalculate it each pass.
 
   // The ABI Section 4.5.4 Mapping symbols; defines local symbols that describe
   // half open intervals [Symbol Value, Next Symbol Value) of code and data
@@ -489,7 +489,7 @@ void AArch64Err843419Patcher::insertPatches(
   uint64_t patchUpperBound = prevIsecLimit + target->getThunkSectionSpacing();
   uint64_t outSecAddr = isd.sections.front()->getParent()->addr;
 
-  // Set the OutSecOff of patches to the place where we want to insert them.
+  // Set the outSecOff of patches to the place where we want to insert them.
   // We use a similar strategy to Thunk placement. Place patches roughly
   // every multiple of maximum branch range.
   auto patchIt = patches.begin();
@@ -511,10 +511,10 @@ void AArch64Err843419Patcher::insertPatches(
     (*patchIt)->outSecOff = isecLimit;
   }
 
-  // merge all patch sections. We use the OutSecOff assigned above to
+  // merge all patch sections. We use the outSecOff assigned above to
   // determine the insertion point. This is ok as we only merge into an
   // InputSectionDescription once per pass, and at the end of the pass
-  // assignAddresses() will recalculate all the OutSecOff values.
+  // assignAddresses() will recalculate all the outSecOff values.
   std::vector<InputSection *> tmp;
   tmp.reserve(isd.sections.size() + patches.size());
   auto mergeCmp = [](const InputSection *a, const InputSection *b) {
@@ -530,8 +530,8 @@ void AArch64Err843419Patcher::insertPatches(
   isd.sections = std::move(tmp);
 }
 
-// Given an erratum sequence that starts at address AdrpAddr, with an
-// instruction that we need to patch at PatcheeOffset from the start of
+// Given an erratum sequence that starts at address adrpAddr, with an
+// instruction that we need to patch at patcheeOffset from the start of
 // InputSection IS, create a Patch843419 Section and add it to the
 // Patches that we need to insert.
 static void implementPatch(uint64_t adrpAddr, uint64_t patcheeOffset,
@@ -587,10 +587,10 @@ AArch64Err843419Patcher::patchInputSectionDescription(
     //  LLD doesn't use the erratum sequence in SyntheticSections.
     if (isa<SyntheticSection>(isec))
       continue;
-    // Use SectionMap to make sure we only scan code and not inline data.
+    // Use sectionMap to make sure we only scan code and not inline data.
     // We have already sorted MapSyms in ascending order and removed consecutive
     // mapping symbols of the same type. Our range of executable instructions to
-    // scan is therefore [CodeSym->Value, DataSym->Value) or [CodeSym->Value,
+    // scan is therefore [codeSym->value, dataSym->value) or [codeSym->value,
     // section size).
     std::vector<const Defined *> &mapSyms = sectionMap[isec];
 
diff --git a/lld/ELF/AArch64ErrataFix.h b/lld/ELF/AArch64ErrataFix.h
index e4752e7bb8a9c..0548b58751ff9 100644
--- a/lld/ELF/AArch64ErrataFix.h
+++ b/lld/ELF/AArch64ErrataFix.h
@@ -36,7 +36,7 @@ class AArch64Err843419Patcher {
 
   void init();
 
-  // A cache of the mapping symbols defined by the InputSecion sorted in order
+  // A cache of the mapping symbols defined by the InputSection sorted in order
   // of ascending value with redundant symbols removed. These describe
   // the ranges of code and data in an executable InputSection.
   std::map<InputSection *, std::vector<const Defined *>> sectionMap;
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index 9b6599be38fc1..4d4789702f03d 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -517,7 +517,7 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
   uint64_t plt = in.plt->getVA();
 
   if (btiHeader) {
-    // PltHeader is called indirectly by Plt[N]. Prefix PltData with a BTI C
+    // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
     // instruction.
     memcpy(buf, btiData, sizeof(btiData));
     buf += sizeof(btiData);
@@ -538,7 +538,7 @@ void AArch64BtiPac::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
                              uint64_t pltEntryAddr, int32_t index,
                              unsigned relOff) const {
   // The PLT entry is of the form:
-  // [BtiData] AddrInst (PacBr | StdBr) [NopData]
+  // [btiData] addrInst (pacBr | stdBr) [nopData]
   const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
   const uint8_t addrInst[] = {
       0x10, 0x00, 0x00, 0x90,  // adrp x16, Page(&(.plt.got[n]))
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index b69974fc72c67..64adc33c07ae2 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -299,13 +299,13 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
 
 uint32_t ARM::getThunkSectionSpacing() const {
   // The placing of pre-created ThunkSections is controlled by the value
-  // ThunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
+  // thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
   // place the ThunkSection such that all branches from the InputSections
   // prior to the ThunkSection can reach a Thunk placed at the end of the
   // ThunkSection. Graphically:
-  // | up to ThunkSectionSpacing .text input sections |
+  // | up to thunkSectionSpacing .text input sections |
   // | ThunkSection                                   |
-  // | up to ThunkSectionSpacing .text input sections |
+  // | up to thunkSectionSpacing .text input sections |
   // | ThunkSection                                   |
 
   // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This
@@ -316,14 +316,14 @@ uint32_t ARM::getThunkSectionSpacing() const {
   // Thumb B<cc>.W range +/- 1MiB
   // If a branch cannot reach a pre-created ThunkSection a new one will be
   // created so we can handle the rare cases of a Thumb 2 conditional branch.
-  // We intentionally use a lower size for ThunkSectionSpacing than the maximum
+  // We intentionally use a lower size for thunkSectionSpacing than the maximum
   // branch range so the end of the ThunkSection is more likely to be within
   // range of the branch instruction that is furthest away. The value we shorten
-  // ThunkSectionSpacing by is set conservatively to allow us to create 16,384
+  // thunkSectionSpacing by is set conservatively to allow us to create 16,384
   // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
   // one of the Thunks going out of range.
 
-  // On Arm the ThunkSectionSpacing depends on the range of the Thumb Branch
+  // On Arm the thunkSectionSpacing depends on the range of the Thumb Branch
   // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except
   // ARMv6T2) the range is +/- 4MiB.
 
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index ddd9b9f7e5cd4..6f16ade571773 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -144,7 +144,7 @@ void RISCV::writePltHeader(uint8_t *buf) const {
   // 1: auipc t2, %pcrel_hi(.got.plt)
   // sub t1, t1, t3
   // l[wd] t3, %pcrel_lo(1b)(t2); t3 = _dl_runtime_resolve
-  // addi t1, t1, -PltHeaderSize-12; t1 = &.plt[i] - &.plt[0]
+  // addi t1, t1, -pltHeaderSize-12; t1 = &.plt[i] - &.plt[0]
   // addi t0, t2, %pcrel_lo(1b)
   // srli t1, t1, (rv64?1:2); t1 = &.got.plt[i] - &.got.plt[0]
   // l[wd] t0, Wordsize(t0); t0 = link_map
diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp
index c9a62f69accad..9aaadd4818336 100644
--- a/lld/ELF/CallGraphSort.cpp
+++ b/lld/ELF/CallGraphSort.cpp
@@ -177,7 +177,7 @@ void CallGraphSort::groupClusters() {
   });
 
   for (int si : sortedSecs) {
-    // Clusters[SI] is the same as SecToClusters[SI] here because it has not
+    // clusters[si] is the same as secToClusters[si] here because it has not
     // been merged into another cluster yet.
     Cluster &c = clusters[si];
 
@@ -233,8 +233,8 @@ DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
       return orderMap;
     }
 
-    // Print the symbols ordered by C3, in the order of increasing CurOrder
-    // Instead of sorting all the OrderMap, just repeat the loops above.
+    // Print the symbols ordered by C3, in the order of increasing curOrder
+    // Instead of sorting all the orderMap, just repeat the loops above.
     for (const Cluster &c : clusters)
       for (int secIndex : c.sections)
         // Search all the symbols in the file of the section
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 54991c7277829..ff9d3dc0933c0 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -249,7 +249,7 @@ struct Configuration {
   // True if the target is little-endian. False if big-endian.
   bool isLE;
 
-  // endianness::little if IsLE is true. endianness::big otherwise.
+  // endianness::little if isLE is true. endianness::big otherwise.
   llvm::support::endianness endianness;
 
   // True if the target is the little-endian MIPS64.
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 17c5860a0d785..98551d2cb34dc 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1255,7 +1255,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) {
       warn("-z common-page-size set, but paging disabled by omagic or nmagic");
     return 1;
   }
-  // CommonPageSize can't be larger than MaxPageSize.
+  // commonPageSize can't be larger than maxPageSize.
   if (val > config->maxPageSize)
     val = config->maxPageSize;
   return val;
@@ -1263,7 +1263,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) {
 
 // Parses -image-base option.
 static Optional<uint64_t> getImageBase(opt::InputArgList &args) {
-  // Because we are using "Config->MaxPageSize" here, this function has to be
+  // Because we are using "Config->maxPageSize" here, this function has to be
   // called after the variable is initialized.
   auto *arg = args.getLastArg(OPT_image_base);
   if (!arg)
@@ -1406,8 +1406,8 @@ static void demoteSharedSymbols() {
   });
 }
 
-// The section referred to by S is considered address-significant. Set the
-// KeepUnique flag on the section if appropriate.
+// The section referred to by `s` is considered address-significant. Set the
+// keepUnique flag on the section if appropriate.
 static void markAddrsig(Symbol *s) {
   if (auto *d = dyn_cast_or_null<Defined>(s))
     if (d->section)
@@ -1772,7 +1772,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
   if (args.hasArg(OPT_exclude_libs))
     excludeLibs(args);
 
-  // Create ElfHeader early. We need a dummy section in
+  // Create elfHeader early. We need a dummy section in
   // addReservedSymbols to mark the created symbols as not absolute.
   Out::elfHeader = make<OutputSection>("", 0, SHF_ALLOC);
   Out::elfHeader->size = sizeof(typename ELFT::Ehdr);
@@ -1854,14 +1854,14 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
   target = getTarget();
 
   config->eflags = target->calcEFlags();
-  // MaxPageSize (sometimes called abi page size) is the maximum page size that
+  // maxPageSize (sometimes called abi page size) is the maximum page size that
   // the output can be run on. For example if the OS can use 4k or 64k page
-  // sizes then MaxPageSize must be 64 for the output to be useable on both.
+  // sizes then maxPageSize must be 64k for the output to be useable on both.
   // All important alignment decisions must use this value.
   config->maxPageSize = getMaxPageSize(args);
-  // CommonPageSize is the most common page size that the output will be run on.
+  // commonPageSize is the most common page size that the output will be run on.
   // For example if an OS can use 4k or 64k page sizes and 4k is more common
-  // than 64k then CommonPageSize is set to 4k. CommonPageSize can be used for
+  // than 64k then commonPageSize is set to 4k. commonPageSize can be used for
   // optimizations such as DATA_SEGMENT_ALIGN in linker scripts. LLD's use of it
   // is limited to writing trap instructions on the last executable segment.
   config->commonPageSize = getCommonPageSize(args);
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index fda2a544aed88..98b88283cf093 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -229,7 +229,7 @@ static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
           file.getVariableLoc(sym.getName()))
     return createFileLineMsg(fileLine->first, fileLine->second);
 
-  // File.SourceFile contains STT_FILE symbol, and that is a last resort.
+  // File.sourceFile contains STT_FILE symbol, and that is a last resort.
   return file.sourceFile;
 }
 
@@ -269,7 +269,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
       continue;
     lineTables.push_back(lt);
 
-    // Loop over variable records and insert them to VariableLoc.
+    // Loop over variable records and insert them to variableLoc.
     for (const auto &entry : cu->dies()) {
       DWARFDie die(cu.get(), &entry);
       // Skip all tags that are not variables.
@@ -290,7 +290,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeDwarf() {
       // Get the line number on which the variable is declared.
       unsigned line = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_line), 0);
 
-      // Here we want to take the variable name to add it into VariableLoc.
+      // Here we want to take the variable name to add it into variableLoc.
       // Variable can have regular and linkage name associated. At first, we try
       // to get linkage name as it can be different, for example when we have
       // two variables in different namespaces of the same object. Use common
@@ -450,7 +450,7 @@ template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() {
 }
 
 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
-  // Read a section table. JustSymbols is usually false.
+  // Read a section table. justSymbols is usually false.
   if (this->justSymbols)
     initializeJustSymbols();
   else
@@ -1178,7 +1178,7 @@ static std::vector<const void *> parseVerdefs(const uint8_t *base,
   // We cannot determine the largest verdef identifier without inspecting
   // every Elf_Verdef, but both bfd and gold assign verdef identifiers
   // sequentially starting from 1, so we predict that the largest identifier
-  // will be VerdefCount.
+  // will be verdefCount.
   unsigned verdefCount = sec->sh_info;
   std::vector<const void *> verdefs(verdefCount + 1);
 
@@ -1262,7 +1262,7 @@ template <class ELFT> void SharedFile::parse() {
     return;
   }
 
-  // Search for a DT_SONAME tag to initialize this->SoName.
+  // Search for a DT_SONAME tag to initialize this->soName.
   for (const Elf_Dyn &dyn : dynamicTags) {
     if (dyn.d_tag == DT_NEEDED) {
       uint64_t val = dyn.getVal();
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 760f72fd198a4..5ccc3d402b376 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -117,7 +117,7 @@ class InputFile {
   // True if this is an argument for --just-symbols. Usually false.
   bool justSymbols = false;
 
-  // OutSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE
+  // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE
   // to compute offsets in PLT call stubs.
   uint32_t ppc32Got2OutSecOff = 0;
 
@@ -132,7 +132,7 @@ class InputFile {
   // [.got, .got + 0xFFFC].
   bool ppc64SmallCodeModelTocRelocs = false;
 
-  // GroupId is used for --warn-backrefs which is an optional error
+  // groupId is used for --warn-backrefs which is an optional error
   // checking feature. All files within the same --{start,end}-group or
   // --{start,end}-lib get the same group ID. Otherwise, each file gets a new
   // group ID. For more info, see checkDependency() in SymbolTable.cpp.
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 1ca520c3740ef..a024ac307b0a9 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -206,9 +206,9 @@ OutputSection *SectionBase::getOutputSection() {
   return sec ? sec->getParent() : nullptr;
 }
 
-// When a section is compressed, `RawData` consists with a header followed
+// When a section is compressed, `rawData` consists with a header followed
 // by zlib-compressed data. This function parses a header to initialize
-// `UncompressedSize` member and remove the header from `RawData`.
+// `uncompressedSize` member and remove the header from `rawData`.
 void InputSectionBase::parseCompressedHeader() {
   using Chdr64 = typename ELF64LE::Chdr;
   using Chdr32 = typename ELF32LE::Chdr;
@@ -306,7 +306,7 @@ std::string InputSectionBase::getLocation(uint64_t offset) {
     return info->FileName + ":" + std::to_string(info->Line) + ":(" +
            secAndOffset + ")";
 
-  // File->SourceFile contains STT_FILE symbol that contains a
+  // File->sourceFile contains STT_FILE symbol that contains a
   // source file name. If it's missing, we use an object file name.
   std::string srcFile = getFile<ELFT>()->sourceFile;
   if (srcFile.empty())
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index dcd4848a0ab1e..3a974074e0e51 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -219,8 +219,8 @@ class InputSectionBase : public SectionBase {
 
   mutable ArrayRef<uint8_t> rawData;
 
-  // This field stores the uncompressed size of the compressed data in RawData,
-  // or -1 if RawData is not compressed (either because the section wasn't
+  // This field stores the uncompressed size of the compressed data in rawData,
+  // or -1 if rawData is not compressed (either because the section wasn't
   // compressed in the first place, or because we ended up uncompressing it).
   // Since the feature is not used often, this is usually -1.
   mutable int64_t uncompressedSize = -1;
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 7230cc51044e4..28d4bfe77c5d4 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -124,11 +124,11 @@ static lto::Config createConfig() {
 }
 
 BitcodeCompiler::BitcodeCompiler() {
-  // Initialize IndexFile.
+  // Initialize indexFile.
   if (!config->thinLTOIndexOnlyArg.empty())
     indexFile = openFile(config->thinLTOIndexOnlyArg);
 
-  // Initialize LTOObj.
+  // Initialize ltoObj.
   lto::ThinBackend backend;
   if (config->thinLTOIndexOnly) {
     auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); };
@@ -142,7 +142,7 @@ BitcodeCompiler::BitcodeCompiler() {
   ltoObj = llvm::make_unique<lto::LTO>(createConfig(), backend,
                                        config->ltoPartitions);
 
-  // Initialize UsedStartStop.
+  // Initialize usedStartStop.
   symtab->forEachSymbol([&](Symbol *sym) {
     StringRef s = sym->getName();
     for (StringRef prefix : {"__start_", "__stop_"})
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index ce009149710fe..49e44d7804761 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -115,7 +115,7 @@ void LinkerScript::expandMemoryRegions(uint64_t size) {
   if (ctx->memRegion)
     expandMemoryRegion(ctx->memRegion, size, ctx->memRegion->name,
                        ctx->outSec->name);
-  // Only expand the LMARegion if it is different from MemRegion.
+  // Only expand the LMARegion if it is different from memRegion.
   if (ctx->lmaRegion && ctx->memRegion != ctx->lmaRegion)
     expandMemoryRegion(ctx->lmaRegion, size, ctx->lmaRegion->name,
                        ctx->outSec->name);
@@ -1035,8 +1035,8 @@ static uint64_t getInitialDot() {
     return config->imageBase ? *config->imageBase : 0;
 
   uint64_t startAddr = UINT64_MAX;
-  // The Sections with -T<section> have been sorted in order of ascending
-  // address. We must lower StartAddr if the lowest -T<section address> as
+  // The sections with -T<section> have been sorted in order of ascending
+  // address. We must lower startAddr if the lowest -T<section address> as
   // calls to setDot() must be monotonically increasing.
   for (auto &kv : config->sectionStartMap)
     startAddr = std::min(startAddr, kv.second);
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index cd71961804b56..ee48f48081360 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -829,7 +829,7 @@ class OffsetGetter {
 
   // Translates offsets in input sections to offsets in output sections.
   // Given offset must increase monotonically. We assume that Piece is
-  // sorted by InputOff.
+  // sorted by inputOff.
   uint64_t get(uint64_t off) {
     if (pieces.empty())
       return off;
@@ -859,10 +859,10 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec,
                              RelType type) {
   Partition &part = isec->getPartition();
 
-  // Add a relative relocation. If RelrDyn section is enabled, and the
+  // Add a relative relocation. If relrDyn section is enabled, and the
   // relocation offset is guaranteed to be even, add the relocation to
-  // the RelrDyn section, otherwise add it to the RelaDyn section.
-  // RelrDyn sections don't support odd offsets. Also, RelrDyn sections
+  // the relrDyn section, otherwise add it to the relaDyn section.
+  // relrDyn sections don't support odd offsets. Also, relrDyn sections
   // don't store the addend values, so we must write it to the relocated
   // address.
   if (part.relrDyn && isec->alignment >= 2 && offsetInSec % 2 == 0) {
@@ -922,7 +922,7 @@ static bool canDefineSymbolInExecutable(Symbol &sym) {
   // executable will preempt it.
   // Note that we want the visibility of the shared symbol itself, not
   // the visibility of the symbol in the output file we are producing. That is
-  // why we use Sym.StOther.
+  // why we use Sym.stOther.
   if ((sym.stOther & 0x3) == STV_DEFAULT)
     return true;
 
@@ -1010,7 +1010,7 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type,
   // Copy relocations (for STT_OBJECT) and canonical PLT (for STT_FUNC) are only
   // possible in an executable.
   //
-  // Among R_ABS relocatoin types, SymbolicRel has the same size as the word
+  // Among R_ABS relocatoin types, symbolicRel has the same size as the word
   // size. Others have fewer bits and may cause runtime overflow in -pie/-shared
   // mode. Disallow them.
   if (config->shared ||
@@ -1237,8 +1237,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
     //   GOT-generating or PLT-generating, the handling of an ifunc is
     //   relatively straightforward. We create a PLT entry in Iplt, which is
     //   usually at the end of .plt, which makes an indirect call using a
-    //   matching GOT entry in IgotPlt, which is usually at the end of .got.plt.
-    //   The GOT entry is relocated using an IRELATIVE relocation in RelaIplt,
+    //   matching GOT entry in igotPlt, which is usually at the end of .got.plt.
+    //   The GOT entry is relocated using an IRELATIVE relocation in relaIplt,
     //   which is usually at the end of .rela.plt. Unlike most relocations in
     //   .rela.plt, which may be evaluated lazily without -z now, dynamic
     //   loaders evaluate IRELATIVE relocs eagerly, which means that for
@@ -1274,13 +1274,13 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
     //   variable containing a pointer to the ifunc) needs to be relocated in
     //   the exact same way as a GOT entry, so we can avoid needing to make the
     //   PLT entry canonical by translating such relocations into IRELATIVE
-    //   relocations in the RelaIplt.
+    //   relocations in the relaIplt.
     if (!sym.isInPlt()) {
       // Create PLT and GOTPLT slots for the symbol.
       sym.isInIplt = true;
 
       // Create a copy of the symbol to use as the target of the IRELATIVE
-      // relocation in the IgotPlt. This is in case we make the PLT canonical
+      // relocation in the igotPlt. This is in case we make the PLT canonical
       // later, which would overwrite the original symbol.
       //
       // FIXME: Creating a copy of the symbol here is a bit of a hack. All
@@ -1526,7 +1526,7 @@ void ThunkCreator::mergeThunks(ArrayRef<OutputSection *> outputSections) {
 
         // ISD->ThunkSections contains all created ThunkSections, including
         // those inserted in previous passes. Extract the Thunks created this
-        // pass and order them in ascending OutSecOff.
+        // pass and order them in ascending outSecOff.
         std::vector<ThunkSection *> newThunks;
         for (const std::pair<ThunkSection *, uint32_t> ts : isd->thunkSections)
           if (ts.second == pass)
@@ -1536,7 +1536,7 @@ void ThunkCreator::mergeThunks(ArrayRef<OutputSection *> outputSections) {
                             return a->outSecOff < b->outSecOff;
                           });
 
-        // Merge sorted vectors of Thunks and InputSections by OutSecOff
+        // Merge sorted vectors of Thunks and InputSections by outSecOff
         std::vector<InputSection *> tmp;
         tmp.reserve(isd->sections.size() + newThunks.size());
 
diff --git a/lld/ELF/ScriptParser.h b/lld/ELF/ScriptParser.h
index 110684761c76d..c953fb302b9a7 100644
--- a/lld/ELF/ScriptParser.h
+++ b/lld/ELF/ScriptParser.h
@@ -16,7 +16,7 @@ namespace lld {
 namespace elf {
 
 // Parses a linker script. Calling this function updates
-// Config and ScriptConfig.
+// lld::elf::config and lld::elf::script.
 void readLinkerScript(MemoryBufferRef mb);
 
 // Parses a version script.
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index fde08064a606d..3faeed8c2bdc3 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -99,7 +99,7 @@ Symbol *SymbolTable::find(StringRef name) {
   return sym;
 }
 
-// Initialize DemangledSyms with a map from demangled symbols to symbol
+// Initialize demangledSyms with a map from demangled symbols to symbol
 // objects. Used to handle "extern C++" directive in version scripts.
 //
 // The map will contain all demangled symbols. That can be very large,
@@ -225,7 +225,7 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
       b->versionId = versionId;
 }
 
-// This function processes version scripts by updating VersionId
+// This function processes version scripts by updating the versionId
 // member of symbols.
 // If there's only one anonymous version definition in a version
 // script file, the script does not actually define any symbol version,
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 918849bd6ab22..f6d0f190d84d0 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -579,11 +579,9 @@ void EhFrameSection::writeTo(uint8_t *buf) {
 GotSection::GotSection()
     : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize,
                        ".got") {
-  // PPC64 saves the ElfSym::GlobalOffsetTable .TOC. as the first entry in the
-  // .got. If there are no references to .TOC. in the symbol table,
-  // ElfSym::GlobalOffsetTable will not be defined and we won't need to save
-  // .TOC. in the .got. When it is defined, we increase NumEntries by the number
-  // of entries used to emit ElfSym::GlobalOffsetTable.
+  // If ElfSym::globalOffsetTable is relative to .got and is referenced,
+  // increase numEntries by the number of entries used to emit
+  // ElfSym::globalOffsetTable.
   if (ElfSym::globalOffsetTable && !target->gotBaseSymInGotPlt)
     numEntries += target->gotHeaderEntriesNum;
 }
@@ -861,9 +859,9 @@ void MipsGotSection::build() {
     } else {
       // If this is the first time we failed to merge with the primary GOT,
       // MergedGots.back() will also be the primary GOT. We must make sure not
-      // to try to merge again with IsPrimary=false, as otherwise, if the
+      // to try to merge again with isPrimary=false, as otherwise, if the
       // inputs are just right, we could allow the primary GOT to become 1 or 2
-      // words too big due to ignoring the header size.
+      // words bigger due to ignoring the header size.
       if (mergedGots.size() == 1 ||
           !tryMergeGots(mergedGots.back(), srcGot, false)) {
         mergedGots.emplace_back();
@@ -888,7 +886,7 @@ void MipsGotSection::build() {
     for (std::pair<const OutputSection *, FileGot::PageBlock> &p :
          got.pagesMap) {
       // For each output section referenced by GOT page relocations calculate
-      // and save into PagesMap an upper bound of MIPS GOT entries required
+      // and save into pagesMap an upper bound of MIPS GOT entries required
       // to store page addresses of local symbols. We assume the worst case -
       // each 64kb page of the output section has at least one GOT relocation
       // against it. And take in account the case when the section intersects
@@ -910,7 +908,7 @@ void MipsGotSection::build() {
     }
   }
 
-  // Update Symbol::GotIndex field to use this
+  // Update Symbol::gotIndex field to use this
   // value later in the `sortMipsSymbols` function.
   for (auto &p : primGot->global)
     p.first->gotIndex = p.second;
@@ -936,7 +934,7 @@ void MipsGotSection::build() {
       } else {
         // When building a shared library we still need a dynamic relocation
         // for the module index. Therefore only checking for
-        // S->IsPreemptible is not sufficient (this happens e.g. for
+        // S->isPreemptible is not sufficient (this happens e.g. for
         // thread-locals that have been marked as local through a linker script)
         if (!s->isPreemptible && !config->isPic)
           continue;
@@ -1140,7 +1138,7 @@ StringTableSection::StringTableSection(StringRef name, bool dynamic)
   addString("");
 }
 
-// Adds a string to the string table. If HashIt is true we hash and check for
+// Adds a string to the string table. If `hashIt` is true we hash and check for
 // duplicates. It is optional because the name of global symbols are already
 // uniqued and hashing them again has a big cost for a small value: uniquing
 // them with some other string that happens to be the same.
@@ -1335,9 +1333,9 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
   }
   // .rel[a].plt section usually consists of two parts, containing plt and
   // iplt relocations. It is possible to have only iplt relocations in the
-  // output. In that case RelaPlt is empty and have zero offset, the same offset
-  // as RelaIplt have. And we still want to emit proper dynamic tags for that
-  // case, so here we always use RelaPlt as marker for the begining of
+  // output. In that case relaPlt is empty and have zero offset, the same offset
+  // as relaIplt has. And we still want to emit proper dynamic tags for that
+  // case, so here we always use relaPlt as marker for the begining of
   // .rel[a].plt section.
   if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) {
     addInSec(DT_JMPREL, in.relaPlt);
@@ -2365,7 +2363,7 @@ void PltSection::writeTo(uint8_t *buf) {
 
   RelocationBaseSection *relSec = isIplt ? in.relaIplt : in.relaPlt;
 
-  // The IPlt is immediately after the Plt, account for this in RelOff
+  // The IPlt is immediately after the Plt, account for this in relOff
   size_t pltOff = isIplt ? in.plt->getSize() : 0;
 
   for (size_t i = 0, e = entries.size(); i != e; ++i) {
@@ -2491,9 +2489,9 @@ readPubNamesAndTypes(const LLDDwarfObj<ELFT> &obj,
   for (const DWARFSection *pub : {&pubNames, &pubTypes}) {
     DWARFDebugPubTable table(obj, *pub, config->isLE, true);
     for (const DWARFDebugPubTable::Set &set : table.getData()) {
-      // The value written into the constant pool is Kind << 24 | CuIndex. As we
+      // The value written into the constant pool is kind << 24 | cuIndex. As we
       // don't know how many compilation units precede this object to compute
-      // CuIndex, we compute (Kind << 24 | CuIndexInThisObject) instead, and add
+      // cuIndex, we compute (kind << 24 | cuIndexInThisObject) instead, and add
       // the number of preceding compilation units later.
       uint32_t i =
           lower_bound(cUs, set.Offset,
@@ -2945,7 +2943,7 @@ void MergeTailSection::finalizeContents() {
 
   // finalize() fixed tail-optimized strings, so we can now get
   // offsets of strings. Get an offset for each string and save it
-  // to a corresponding StringPiece for easy access.
+  // to a corresponding SectionPiece for easy access.
   for (MergeInputSection *sec : sections)
     for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
       if (sec->pieces[i].live)
@@ -3330,7 +3328,7 @@ bool PPC32Got2Section::isNeeded() const {
 
 void PPC32Got2Section::finalizeContents() {
   // PPC32 may create multiple GOT sections for -fPIC/-fPIE, one per file in
-  // .got2 . This function computes OutSecOff of each .got2 to be used in
+  // .got2 . This function computes outSecOff of each .got2 to be used in
   // PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is
   // to collect input sections named ".got2".
   uint32_t offset = 0;
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index ab2995554c478..1c4dd06e02776 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -434,8 +434,8 @@ class DynamicReloc {
   uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
 
   // Computes the addend of the dynamic relocation. Note that this is not the
-  // same as the Addend member variable as it also includes the symbol address
-  // if UseSymVA is true.
+  // same as the addend member variable as it also includes the symbol address
+  // if useSymVA is true.
   int64_t computeAddend() const;
 
   RelType type;
@@ -1026,7 +1026,7 @@ class ARMExidxSyntheticSection : public SyntheticSection {
 // thunks including ARM interworking and Mips LA25 PI to non-PI thunks.
 class ThunkSection : public SyntheticSection {
 public:
-  // ThunkSection in OS, with desired OutSecOff of Off
+  // ThunkSection in OS, with desired outSecOff of Off
   ThunkSection(OutputSection *os, uint64_t off);
 
   // Add a newly created Thunk to this container:
@@ -1044,7 +1044,7 @@ class ThunkSection : public SyntheticSection {
   size_t size = 0;
 };
 
-// Used to compute OutSecOff of .got2 in each object file. This is needed to
+// Used to compute outSecOff of .got2 in each object file. This is needed to
 // synthesize PLT entries for PPC32 Secure PLT ABI.
 class PPC32Got2Section final : public SyntheticSection {
 public:
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 00c93a9c2e6b3..effa6001f6d96 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -73,7 +73,7 @@ class TargetInfo {
   virtual bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
                                                 uint8_t stOther) const;
 
-  // Return true if we can reach Dst from Src with Relocation RelocType
+  // Return true if we can reach dst from src with RelType type.
   virtual bool inBranchRange(RelType type, uint64_t src,
                              uint64_t dst) const;
 
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 32e935126e790..73208f932031d 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -388,7 +388,7 @@ static uint64_t getARMThunkDestVA(const Symbol &s) {
 }
 
 // This function returns true if the target is not Thumb and is within 2^26, and
-// it has not previously returned false (see comment for MayUseShortThunk).
+// it has not previously returned false (see comment for mayUseShortThunk).
 bool ARMThunk::getMayUseShortThunk() {
   if (!mayUseShortThunk)
     return false;
@@ -426,7 +426,7 @@ bool ARMThunk::isCompatibleWith(const InputSection &isec,
 }
 
 // This function returns true if the target is Thumb and is within 2^25, and
-// it has not previously returned false (see comment for MayUseShortThunk).
+// it has not previously returned false (see comment for mayUseShortThunk).
 bool ThumbThunk::getMayUseShortThunk() {
   if (!mayUseShortThunk)
     return false;
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index d1dc8ef7606c4..3cf7b056064f3 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -504,7 +504,7 @@ template <class ELFT> static void createSyntheticSections() {
       config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false);
   add(in.relaPlt);
 
-  // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure
+  // The relaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure
   // that the IRelative relocations are processed last by the dynamic loader.
   // We cannot place the iplt section in .rel.dyn when Android relocation
   // packing is enabled because that would cause a section type mismatch.
@@ -1023,7 +1023,7 @@ template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() {
 
   // By default, __rela_iplt_{start,end} belong to a dummy section 0
   // because .rela.plt might be empty and thus removed from output.
-  // We'll override Out::ElfHeader with In.RelaIplt later when we are
+  // We'll override Out::elfHeader with In.relaIplt later when we are
   // sure that .rela.plt exists in output.
   ElfSym::relaIpltStart = addOptionalRegular(
       config->isRela ? "__rela_iplt_start" : "__rel_iplt_start",
@@ -1424,7 +1424,7 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
       continue;
     os->sortRank = getSectionRank(os);
 
-    // We want to assign rude approximation values to OutSecOff fields
+    // We want to assign rude approximation values to outSecOff fields
     // to know the relative order of the input sections. We use it for
     // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder().
     uint64_t i = 0;
@@ -1884,7 +1884,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
   finalizeSynthetic(in.partIndex);
 
   // Dynamic section must be the last one in this list and dynamic
-  // symbol table section (DynSymTab) must be the first one.
+  // symbol table section (dynSymTab) must be the first one.
   for (Partition &part : partitions) {
     finalizeSynthetic(part.armExidx);
     finalizeSynthetic(part.dynSymTab);
diff --git a/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s b/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s
index 80c738eab1ff3..997eb6be3e523 100644
--- a/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s
+++ b/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s
@@ -31,7 +31,7 @@ _start:
 	.byte	0
 .Lcu_end0:
 
-# .debug_gnu_pubnames has just one set, associated with .Lcu_begin1 (CuIndex: 1)
+# .debug_gnu_pubnames has just one set, associated with .Lcu_begin1 (cuIndex: 1)
 .section .debug_gnu_pubnames,"",@progbits
 	.long	.LpubNames_end0 - .LpubNames_begin0
 .LpubNames_begin0:
diff --git a/lld/test/ELF/gdb-index-multiple-cu-2.s b/lld/test/ELF/gdb-index-multiple-cu-2.s
index 9cf2a0c826934..06316860871da 100644
--- a/lld/test/ELF/gdb-index-multiple-cu-2.s
+++ b/lld/test/ELF/gdb-index-multiple-cu-2.s
@@ -4,8 +4,8 @@
 # RUN: ld.lld --gdb-index %t.o %t1.o -o %t
 # RUN: llvm-dwarfdump -gdb-index %t | FileCheck %s
 
-# %t.o has 2 CUs while %t1 has 1, thus _start in %t1.o should have CuIndex 2.
-# Attributes << 24 | CuIndex = 48 << 24 | 2 = 0x30000002
+# %t.o has 2 CUs while %t1 has 1, thus _start in %t1.o should have cuIndex 2.
+# attributes << 24 | cuIndex = 48 << 24 | 2 = 0x30000002
 # CHECK:      Constant pool
 # CHECK-NEXT:   0(0x0): 0x30000002
 
diff --git a/lld/test/ELF/gdb-index-multiple-cu.s b/lld/test/ELF/gdb-index-multiple-cu.s
index 9a8c2eae78d4a..8702d9f3924db 100644
--- a/lld/test/ELF/gdb-index-multiple-cu.s
+++ b/lld/test/ELF/gdb-index-multiple-cu.s
@@ -3,10 +3,10 @@
 # RUN: ld.lld --gdb-index %t.o -o %t
 # RUN: llvm-dwarfdump -gdb-index %t | FileCheck %s
 
-# CuIndexAndAttrs of _start:
-#   Attributes << 24 | CuIndex = 48 << 24 | 0 = 0x30000000
-# CuIndexAndAttrs of foo:
-#   Attributes << 24 | CuIndex = 48 << 24 | 1 = 0x30000001
+# cuIndexAndAttrs of _start:
+#   attributes << 24 | cuIndex = 48 << 24 | 0 = 0x30000000
+# cuIndexAndAttrs of foo:
+#   attributes << 24 | cuIndex = 48 << 24 | 1 = 0x30000001
 # CHECK:      Symbol table
 # CHECK-DAG:      String name: _start, CU vector index: 0
 # CHECK-DAG:      String name: foo, CU vector index: 1
@@ -63,7 +63,7 @@ foo:
 # Swap sets to test the case where pubnames are in a
 # different order than the CUs they refer to.
 .section .debug_gnu_pubnames,"",@progbits
-	# CuIndex: 1
+	# cuIndex: 1
 	.long	.LpubNames_end1 - .LpubNames_begin1
 .LpubNames_begin1:
 	.short	2              # Version
@@ -75,7 +75,7 @@ foo:
 	.long	0
 .LpubNames_end1:
 
-	# CuIndex: 0
+	# cuIndex: 0
 	.long	.LpubNames_end0 - .LpubNames_begin0
 .LpubNames_begin0:
 	.short	2              # Version

From c0b2ed664bc0c793051eb95e89d51c02aa5871f7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 16 Jul 2019 05:52:27 +0000
Subject: [PATCH 211/451] [X86] In combineStore, don't convert v2f32 load/store
 pairs to f64 loads/stores.

Type legalization can take care of this. This gives DAG combine
a little more time with the original types.

llvm-svn: 366182
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 23926ca80527d..62499a28dff85 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40093,7 +40093,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
   bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
   bool F64IsLegal =
       !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2();
-  if ((VT.isVector() ||
+  if (((VT.isVector() && !VT.isFloatingPoint()) ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
       !cast<LoadSDNode>(St->getValue())->isVolatile() &&
@@ -40116,8 +40116,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
     if (Subtarget.is64Bit() || F64IsLegal) {
-      MVT LdVT = (Subtarget.is64Bit() &&
-                  (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
+      MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getMemOperand());
 

From 3e10905c49ffda36d7a90e24be1ab30832fa4afc Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Tue, 16 Jul 2019 05:58:03 +0000
Subject: [PATCH 212/451] [NFC][test] Fix for riscv tests.

Following tests need updating for: https://reviews.llvm.org/D55277

llvm-svn: 366183
---
 lld/test/ELF/riscv-call.s   |  8 ++++----
 lld/test/ELF/riscv-plt.s    | 12 ++++++------
 lld/test/ELF/riscv-tls-gd.s |  8 ++++----
 lld/test/ELF/riscv-tls-ld.s |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/lld/test/ELF/riscv-call.s b/lld/test/ELF/riscv-call.s
index d5b9b370e807c..a9a73841fd746 100644
--- a/lld/test/ELF/riscv-call.s
+++ b/lld/test/ELF/riscv-call.s
@@ -8,18 +8,18 @@
 # RUN: llvm-objdump -d %t.rv32 | FileCheck %s
 # RUN: llvm-objdump -d %t.rv64 | FileCheck %s
 # CHECK:      97 00 00 00     auipc   ra, 0
-# CHECK-NEXT: e7 80 80 00     jalr    ra, ra, 8
+# CHECK-NEXT: e7 80 80 00     jalr    8(ra)
 # CHECK:      97 00 00 00     auipc   ra, 0
-# CHECK-NEXT: e7 80 80 ff     jalr    ra, ra, -8
+# CHECK-NEXT: e7 80 80 ff     jalr    -8(ra)
 
 # RUN: ld.lld %t.rv32.o --defsym foo=_start+0x7ffff7ff --defsym bar=_start+8-0x80000800 -o %t.rv32.limits
 # RUN: ld.lld %t.rv64.o --defsym foo=_start+0x7ffff7ff --defsym bar=_start+8-0x80000800 -o %t.rv64.limits
 # RUN: llvm-objdump -d %t.rv32.limits | FileCheck --check-prefix=LIMITS %s
 # RUN: llvm-objdump -d %t.rv64.limits | FileCheck --check-prefix=LIMITS %s
 # LIMITS:      97 f0 ff 7f     auipc   ra, 524287
-# LIMITS-NEXT: e7 80 f0 7f     jalr    ra, ra, 2047
+# LIMITS-NEXT: e7 80 f0 7f     jalr    2047(ra)
 # LIMITS-NEXT: 97 00 00 80     auipc   ra, 524288
-# LIMITS-NEXT: e7 80 00 80     jalr    ra, ra, -2048
+# LIMITS-NEXT: e7 80 00 80     jalr    -2048(ra)
 
 # RUN: ld.lld %t.rv32.o --defsym foo=_start+0x7ffff800 --defsym bar=_start+8-0x80000801 -o %t
 # RUN: not ld.lld %t.rv64.o --defsym foo=_start+0x7ffff800 --defsym bar=_start+8-0x80000801 -o %t 2>&1 | FileCheck --check-prefix=ERROR %s
diff --git a/lld/test/ELF/riscv-plt.s b/lld/test/ELF/riscv-plt.s
index 2a199f20cf509..0afd5b0057fad 100644
--- a/lld/test/ELF/riscv-plt.s
+++ b/lld/test/ELF/riscv-plt.s
@@ -47,16 +47,16 @@
 ## Direct call
 ## foo - . = 0x11020-0x11000 = 32
 # DIS-NEXT:          auipc ra, 0
-# DIS-NEXT:   11004: jalr ra, ra, 32
+# DIS-NEXT:   11004: jalr 32(ra)
 ## bar@plt - . = 0x11050-0x1100c = 72
 # DIS-NEXT:          auipc ra, 0
-# DIS-NEXT:   1100c: jalr ra, ra, 72
+# DIS-NEXT:   1100c: jalr 72(ra)
 ## bar@plt - . = 0x11050-0x11014 = 64
 # DIS-NEXT:          auipc ra, 0
-# DIS-NEXT:   11014: jalr ra, ra, 64
+# DIS-NEXT:   11014: jalr 64(ra)
 ## weak@plt - . = 0x11060-0x1101c = 72
 # DIS-NEXT:          auipc ra, 0
-# DIS-NEXT:   1101c: jalr ra, ra, 72
+# DIS-NEXT:   1101c: jalr 72(ra)
 # DIS:      foo:
 # DIS-NEXT:   11020:
 
@@ -79,14 +79,14 @@
 # DIS:        11050: auipc t3, 2
 # DIS32-NEXT:   lw t3, -72(t3)
 # DIS64-NEXT:   ld t3, -64(t3)
-# DIS-NEXT:     jalr t1, t3, 0
+# DIS-NEXT:     jalr t1, t3
 # DIS-NEXT:     nop
 
 ## 32-bit: &.got.plt[weak]-. = 0x1300c-0x11060 = 4096*2-84
 # DIS:        11060: auipc t3, 2
 # DIS32-NEXT:   lw t3, -84(t3)
 # DIS64-NEXT:   ld t3, -72(t3)
-# DIS-NEXT:     jalr t1, t3, 0
+# DIS-NEXT:     jalr t1, t3
 # DIS-NEXT:     nop
 
 .global _start, foo, bar
diff --git a/lld/test/ELF/riscv-tls-gd.s b/lld/test/ELF/riscv-tls-gd.s
index 21a8695405692..3f5735aabf2de 100644
--- a/lld/test/ELF/riscv-tls-gd.s
+++ b/lld/test/ELF/riscv-tls-gd.s
@@ -56,13 +56,13 @@
 # GD32:      1000: auipc a0, 1
 # GD32-NEXT:       addi a0, a0, 112
 # GD32-NEXT:       auipc ra, 0
-# GD32-NEXT:       jalr ra, ra, 56
+# GD32-NEXT:       jalr 56(ra)
 
 ## &DTPMOD(b) - . = 0x2078 - 0x1010 = 4096*1+104
 # GD32:      1010: auipc a0, 1
 # GD32-NEXT:       addi a0, a0, 104
 # GD32-NEXT:       auipc ra, 0
-# GD32-NEXT:       jalr ra, ra, 40
+# GD32-NEXT:       jalr 40(ra)
 
 # GD64-REL:      .rela.dyn {
 # GD64-REL-NEXT:   0x20E0 R_RISCV_TLS_DTPMOD64 a 0x0
@@ -75,13 +75,13 @@
 # GD64:      1000: auipc a0, 1
 # GD64-NEXT:       addi a0, a0, 224
 # GD64-NEXT:       auipc ra, 0
-# GD64-NEXT:       jalr ra, ra, 56
+# GD64-NEXT:       jalr 56(ra)
 
 ## &DTPMOD(b) - . = 0x20f0 - 0x1010 = 4096*1+224
 # GD64:      1010: auipc a0, 1
 # GD64-NEXT:       addi a0, a0, 224
 # GD64-NEXT:       auipc ra, 0
-# GD64-NEXT:       jalr ra, ra, 40
+# GD64-NEXT:       jalr 40(ra)
 
 # NOREL: no relocations
 
diff --git a/lld/test/ELF/riscv-tls-ld.s b/lld/test/ELF/riscv-tls-ld.s
index a2a6768899088..6563cf874634d 100644
--- a/lld/test/ELF/riscv-tls-ld.s
+++ b/lld/test/ELF/riscv-tls-ld.s
@@ -55,7 +55,7 @@
 # LD32-NEXT:       addi a0, a0, 124
 # LD64-NEXT:       addi a0, a0, 248
 # LD-NEXT:         auipc ra, 0
-# LD-NEXT:         jalr ra, ra, 56
+# LD-NEXT:         jalr 56(ra)
 
 # NOREL: no relocations
 
@@ -74,7 +74,7 @@
 # LE32-NEXT:        addi a0, a0, 4
 # LE64-NEXT:        addi a0, a0, 8
 # LE-NEXT:          auipc ra, 0
-# LE-NEXT:          jalr ra, ra, 24
+# LE-NEXT:          jalr 24(ra)
 
 la.tls.gd a0, .LANCHOR0
 call __tls_get_addr@plt

From e215996a2932ed7c472f4e94dc4345b30fd0c373 Mon Sep 17 00:00:00 2001
From: Stephan Bergmann <sbergman@redhat.com>
Date: Tue, 16 Jul 2019 06:23:27 +0000
Subject: [PATCH 213/451] Finish "Adapt -fsanitize=function to
 SANITIZER_NON_UNIQUE_TYPEINFO"

i.e., recent 5745eccef54ddd3caca278d1d292a88b2281528b:

* Bump the function_type_mismatch handler version, as its signature has changed.

* The function_type_mismatch handler can return successfully now, so
  SanitizerKind::Function must be AlwaysRecoverable (like for
  SanitizerKind::Vptr).

* But the minimal runtime would still unconditionally treat a call to the
  function_type_mismatch handler as failure, so disallow -fsanitize=function in
  combination with -fsanitize-minimal-runtime (like it was already done for
  -fsanitize=vptr).

* Add tests.

Differential Revision: https://reviews.llvm.org/D61479

llvm-svn: 366186
---
 clang/docs/UndefinedBehaviorSanitizer.rst     |  4 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  2 +-
 clang/lib/CodeGen/CodeGenFunction.h           |  2 +-
 clang/lib/Driver/SanitizerArgs.cpp            |  3 +-
 clang/test/CodeGen/ubsan-function.cpp         | 22 ++++++
 clang/test/Driver/fsanitize.c                 |  5 +-
 compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc   | 15 ++---
 compiler-rt/lib/ubsan/ubsan_handlers_cxx.h    | 15 +++--
 compiler-rt/lib/ubsan/ubsan_interface.inc     |  4 +-
 .../TestCases/TypeCheck/Function/function.cpp | 67 ++++++++++++++++++-
 10 files changed, 113 insertions(+), 26 deletions(-)
 create mode 100644 clang/test/CodeGen/ubsan-function.cpp

diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 7a4eaf4f60d5d..38cd3645bceac 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -205,8 +205,8 @@ Minimal Runtime
 
 There is a minimal UBSan runtime available suitable for use in production
 environments. This runtime has a small attack surface. It only provides very
-basic issue logging and deduplication, and does not support ``-fsanitize=vptr``
-checking.
+basic issue logging and deduplication, and does not support
+``-fsanitize=function`` and ``-fsanitize=vptr`` checking.
 
 To use the minimal runtime, add ``-fsanitize-minimal-runtime`` to the clang
 command line options. For example, if you're used to compiling with
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 8340f48abcb64..4d19a12e5cb05 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2927,7 +2927,7 @@ enum class CheckRecoverableKind {
 
 static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) {
   assert(Kind.countPopulation() == 1);
-  if (Kind == SanitizerKind::Vptr)
+  if (Kind == SanitizerKind::Function || Kind == SanitizerKind::Vptr)
     return CheckRecoverableKind::AlwaysRecoverable;
   else if (Kind == SanitizerKind::Return || Kind == SanitizerKind::Unreachable)
     return CheckRecoverableKind::Unrecoverable;
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a51a9711ff170..bd9e14206a09e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -114,7 +114,7 @@ enum TypeEvaluationKind {
   SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0)                          \
   SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0)            \
   SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0)                   \
-  SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0)             \
+  SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1)             \
   SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0)                  \
   SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0)                          \
   SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0)                     \
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 9132faa917646..6b6a9feec42c5 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -31,7 +31,8 @@ static const SanitizerMask NeedsUbsanRt =
 static const SanitizerMask NeedsUbsanCxxRt =
     SanitizerKind::Vptr | SanitizerKind::CFI;
 static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr;
-static const SanitizerMask NotAllowedWithMinimalRuntime = SanitizerKind::Vptr;
+static const SanitizerMask NotAllowedWithMinimalRuntime =
+    SanitizerKind::Function | SanitizerKind::Vptr;
 static const SanitizerMask RequiresPIE =
     SanitizerKind::DataFlow | SanitizerKind::HWAddress | SanitizerKind::Scudo;
 static const SanitizerMask NeedsUnwindTables =
diff --git a/clang/test/CodeGen/ubsan-function.cpp b/clang/test/CodeGen/ubsan-function.cpp
new file mode 100644
index 0000000000000..749e6214242cf
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-function.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s
+
+// CHECK-LABEL: define void @_Z3funv() #0 prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @_Z3funv to i64)) to i32) }> {
+void fun() {}
+
+// CHECK-LABEL: define void @_Z6callerPFvvE(void ()* %f)
+// CHECK: getelementptr <{ i32, i32 }>, <{ i32, i32 }>* {{.*}}, i32 0, i32 0, !nosanitize
+// CHECK: load i32, i32* {{.*}}, align {{.*}}, !nosanitize
+// CHECK: icmp eq i32 {{.*}}, 846595819, !nosanitize
+// CHECK: br i1 {{.*}}, label %[[LABEL1:.*]], label %[[LABEL4:.*]], !nosanitize
+// CHECK: [[LABEL1]]:
+// CHECK: getelementptr <{ i32, i32 }>, <{ i32, i32 }>* {{.*}}, i32 0, i32 1, !nosanitize
+// CHECK: load i32, i32* {{.*}}, align {{.*}}, !nosanitize
+// CHECK: icmp eq i8* {{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize
+// CHECK: br i1 {{.*}}, label %[[LABEL3:.*]], label %[[LABEL2:[^,]*]], {{.*}}!nosanitize
+// CHECK: [[LABEL2]]:
+// CHECK: call void @__ubsan_handle_function_type_mismatch_v1_abort(i8* {{.*}}, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) #{{.*}}, !nosanitize
+// CHECK-NOT: unreachable
+// CHECK: br label %[[LABEL3]], !nosanitize
+// CHECK: [[LABEL3]]:
+// CHECK: br label %[[LABEL4]], !nosanitize
+void caller(void (*f)()) { f(); }
diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index 01367c7e67881..2896eda5aaa0f 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -759,9 +759,12 @@
 // CHECK-TSAN-MINIMAL: error: invalid argument '-fsanitize-minimal-runtime' not allowed with '-fsanitize=thread'
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-MINIMAL
-// CHECK-UBSAN-MINIMAL: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute|function),?){18}"}}
+// CHECK-UBSAN-MINIMAL: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
 // CHECK-UBSAN-MINIMAL: "-fsanitize-minimal-runtime"
 
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize=function -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-MINIMAL
+// CHECK-UBSAN-FUNCTION-MINIMAL: error: invalid argument '-fsanitize=function' not allowed with '-fsanitize-minimal-runtime'
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize=vptr -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-VPTR-MINIMAL
 // CHECK-UBSAN-VPTR-MINIMAL: error: invalid argument '-fsanitize=vptr' not allowed with '-fsanitize-minimal-runtime'
 
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc
index 839bba3691f91..9c324cc19a11f 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc
+++ b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc
@@ -185,18 +185,17 @@ static bool handleFunctionTypeMismatch(FunctionTypeMismatchData *Data,
   return true;
 }
 
-void __ubsan_handle_function_type_mismatch(FunctionTypeMismatchData *Data,
-                                           ValueHandle Function,
-                                           ValueHandle calleeRTTI,
-                                           ValueHandle fnRTTI) {
+void __ubsan_handle_function_type_mismatch_v1(FunctionTypeMismatchData *Data,
+                                              ValueHandle Function,
+                                              ValueHandle calleeRTTI,
+                                              ValueHandle fnRTTI) {
   GET_REPORT_OPTIONS(false);
   handleFunctionTypeMismatch(Data, Function, calleeRTTI, fnRTTI, Opts);
 }
 
-void __ubsan_handle_function_type_mismatch_abort(FunctionTypeMismatchData *Data,
-                                                 ValueHandle Function,
-                                                 ValueHandle calleeRTTI,
-                                                 ValueHandle fnRTTI) {
+void __ubsan_handle_function_type_mismatch_v1_abort(
+    FunctionTypeMismatchData *Data, ValueHandle Function,
+    ValueHandle calleeRTTI, ValueHandle fnRTTI) {
   GET_REPORT_OPTIONS(true);
   if (handleFunctionTypeMismatch(Data, Function, calleeRTTI, fnRTTI, Opts))
     Die();
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h
index be2345dc166eb..f7b9fc54f4724 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h
+++ b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h
@@ -40,14 +40,15 @@ struct FunctionTypeMismatchData {
 };
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
-__ubsan_handle_function_type_mismatch(FunctionTypeMismatchData *Data,
-                                      ValueHandle Val, ValueHandle calleeRTTI,
-                                      ValueHandle fnRTTI);
+__ubsan_handle_function_type_mismatch_v1(FunctionTypeMismatchData *Data,
+                                         ValueHandle Val,
+                                         ValueHandle calleeRTTI,
+                                         ValueHandle fnRTTI);
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
-__ubsan_handle_function_type_mismatch_abort(FunctionTypeMismatchData *Data,
-                                            ValueHandle Val,
-                                            ValueHandle calleeRTTI,
-                                            ValueHandle fnRTTI);
+__ubsan_handle_function_type_mismatch_v1_abort(FunctionTypeMismatchData *Data,
+                                               ValueHandle Val,
+                                               ValueHandle calleeRTTI,
+                                               ValueHandle fnRTTI);
 }
 
 #endif // UBSAN_HANDLERS_H
diff --git a/compiler-rt/lib/ubsan/ubsan_interface.inc b/compiler-rt/lib/ubsan/ubsan_interface.inc
index 3eb07b7b9fe35..1e44bc2171ded 100644
--- a/compiler-rt/lib/ubsan/ubsan_interface.inc
+++ b/compiler-rt/lib/ubsan/ubsan_interface.inc
@@ -21,8 +21,8 @@ INTERFACE_FUNCTION(__ubsan_handle_dynamic_type_cache_miss)
 INTERFACE_FUNCTION(__ubsan_handle_dynamic_type_cache_miss_abort)
 INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow)
 INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow_abort)
-INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch)
-INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_abort)
+INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_v1)
+INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_v1_abort)
 INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion)
 INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion_abort)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin)
diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
index 31baa2af8ca9d..07402fdcd7085 100644
--- a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
+++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp
@@ -1,11 +1,53 @@
-// RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -o %t
-// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -DDETERMINE_UNIQUE %s -o %t-unique
+// RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -DSHARED_LIB -fPIC -shared -o %t-so.so
+// RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -o %t %t-so.so
+// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK $(%run %t-unique UNIQUE)
 // Verify that we can disable symbolization if needed:
-// RUN: %env_ubsan_opts=symbolize=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOSYM
+// RUN: %env_ubsan_opts=symbolize=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOSYM $(%run %t-unique NOSYM-UNIQUE)
 // XFAIL: windows-msvc
 // Unsupported function flag
 // UNSUPPORTED: openbsd
 
+#ifdef DETERMINE_UNIQUE
+
+#include <iostream>
+
+#include "../../../../../lib/sanitizer_common/sanitizer_platform.h"
+
+int main(int, char **argv) {
+  if (!SANITIZER_NON_UNIQUE_TYPEINFO)
+    std::cout << "--check-prefix=" << argv[1];
+}
+
+#else
+
+struct Shared {};
+using FnShared = void (*)(Shared *);
+FnShared getShared();
+
+struct __attribute__((visibility("hidden"))) Hidden {};
+using FnHidden = void (*)(Hidden *);
+FnHidden getHidden();
+
+namespace {
+struct Private {};
+} // namespace
+using FnPrivate = void (*)(void *);
+FnPrivate getPrivate();
+
+#ifdef SHARED_LIB
+
+void fnShared(Shared *) {}
+FnShared getShared() { return fnShared; }
+
+void fnHidden(Hidden *) {}
+FnHidden getHidden() { return fnHidden; }
+
+void fnPrivate(Private *) {}
+FnPrivate getPrivate() { return reinterpret_cast<FnPrivate>(fnPrivate); }
+
+#else
+
 #include <stdint.h>
 
 void f() {}
@@ -64,12 +106,31 @@ void check_noexcept_calls() {
   p2(0);
 }
 
+void check_cross_dso() {
+  getShared()(nullptr);
+
+  // UNIQUE: function.cpp:[[@LINE+2]]:3: runtime error: call to function fnHidden(Hidden*) through pointer to incorrect function type 'void (*)(Hidden *)'
+  // NOSYM-UNIQUE: function.cpp:[[@LINE+1]]:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)(Hidden *)'
+  getHidden()(nullptr);
+
+  // TODO: Unlike GCC, Clang fails to prefix the typeinfo name for the function
+  // type with "*", so this erroneously only fails for "*UNIQUE":
+  // UNIQUE: function.cpp:[[@LINE+2]]:3: runtime error: call to function fnPrivate((anonymous namespace)::Private*) through pointer to incorrect function type 'void (*)((anonymous namespace)::Private *)'
+  // NOSYM-UNIQUE: function.cpp:[[@LINE+1]]:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)((anonymous namespace)::Private *)'
+  reinterpret_cast<void (*)(Private *)>(getPrivate())(nullptr);
+}
+
 int main(void) {
   make_valid_call();
   make_invalid_call();
   check_noexcept_calls();
+  check_cross_dso();
   // Check that no more errors will be printed.
   // CHECK-NOT: runtime error: call to function
   // NOSYM-NOT: runtime error: call to function
   make_invalid_call();
 }
+
+#endif
+
+#endif

From d0ac1888aab490589788bd51a9f44f7745dc5819 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Tue, 16 Jul 2019 06:34:44 +0000
Subject: [PATCH 214/451] [lldb] Handle EOF from `lldb-vscode`

Sometimes (when running lldb-vscode under strace) I get:

read(0, "", 16)                         = 0
read(0, "", 16)                         = 0
read(0, "", 16)                         = 0
...

With this patch testcases finish properly even with strace:

read(0, "", 16)                         = 0
futex(0x1346508, FUTEX_WAKE_PRIVATE, 2147483647) = 0
stat("", 0x7ffe8f2634c8)                = -1 ENOENT (No such file or directory)
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=9124, si_uid=1001, si_status=SIGINT, si_utime=1, si_stime=0} ---
close(4)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++

Differential Revision: https://reviews.llvm.org/D64698

llvm-svn: 366187
---
 lldb/tools/lldb-vscode/IOStream.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp
index e07ae079f7ede..4b11b90b4c2e8 100644
--- a/lldb/tools/lldb-vscode/IOStream.cpp
+++ b/lldb/tools/lldb-vscode/IOStream.cpp
@@ -101,6 +101,11 @@ bool InputStream::read_full(std::ofstream *log, size_t length,
     else
       bytes_read = ::read(descriptor.m_fd, ptr, length);
 
+    if (bytes_read == 0) {
+      if (log)
+        *log << "End of file (EOF) reading from input file.\n";
+      return false;
+    }
     if (bytes_read < 0) {
       int reason = 0;
 #if defined(_WIN32)

From 860f7ec05871d36b519ba31295670963f2fd3f95 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Tue, 16 Jul 2019 06:53:06 +0000
Subject: [PATCH 215/451] [DWARF] Simplify DWARFAttribute. NFC.

The first argument in the constructor was ignored, and the remaining
arguments were always passed as their defaults.

Differential Revision: https://reviews.llvm.org/D64407

llvm-svn: 366188
---
 llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h | 12 +-----------
 llvm/lib/DebugInfo/DWARF/DWARFDie.cpp              |  4 ++--
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
index 96e622ccc0331..c8ad19ad6bf64 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h
@@ -27,13 +27,10 @@ struct DWARFAttribute {
   /// The debug info/types section byte size of the data for this attribute.
   uint32_t ByteSize = 0;
   /// The attribute enumeration of this attribute.
-  dwarf::Attribute Attr;
+  dwarf::Attribute Attr = dwarf::Attribute(0);
   /// The form and value for this attribute.
   DWARFFormValue Value;
 
-  DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0),
-                 dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {}
-
   bool isValid() const {
     return Offset != 0 && Attr != dwarf::Attribute(0);
   }
@@ -45,13 +42,6 @@ struct DWARFAttribute {
   /// Identifies DWARF attributes that may contain a reference to a
   /// DWARF expression.
   static bool mayHaveLocationDescription(dwarf::Attribute Attr);
-
-  void clear() {
-    Offset = 0;
-    ByteSize = 0;
-    Attr = dwarf::Attribute(0);
-    Value = DWARFFormValue();
-  }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 6212842983669..d638dc4239f47 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -663,7 +663,7 @@ iterator_range<DWARFDie::attribute_iterator> DWARFDie::attributes() const {
 }
 
 DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End)
-    : Die(D), AttrValue(0), Index(0) {
+    : Die(D), Index(0) {
   auto AbbrDecl = Die.getAbbreviationDeclarationPtr();
   assert(AbbrDecl && "Must have abbreviation declaration");
   if (End) {
@@ -693,7 +693,7 @@ void DWARFDie::attribute_iterator::updateForIndex(
     AttrValue.ByteSize = ParseOffset - AttrValue.Offset;
   } else {
     assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only");
-    AttrValue.clear();
+    AttrValue = {};
   }
 }
 

From 74c350af2181cf21085f69bd2d9bcf4d6d2dc03c Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Tue, 16 Jul 2019 06:56:10 +0000
Subject: [PATCH 216/451] [DWARF] Fix an incorrect format specifier.

This adjusts the format specifier because PCOffset is uint16_t.

Differential Revision: https://reviews.llvm.org/D64620

llvm-svn: 366189
---
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 8a621084710e4..77b4688c23465 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -758,7 +758,7 @@ Error DWARFDebugLine::LineTable::parse(
           State.Row.Address.Address += PCOffset;
           if (OS)
             *OS
-                << format(" (0x%16.16" PRIx64 ")", PCOffset);
+                << format(" (0x%4.4" PRIx16 ")", PCOffset);
         }
         break;
 

From f48bc0181232be2499afe84f478bcda5699931af Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Tue, 16 Jul 2019 07:01:08 +0000
Subject: [PATCH 217/451] [DWARF] Fix the reserved values for unit length in
 DWARFDebugLine.

The DWARF3 documentation had inconsistency concerning the reserved range
for unit length values. The issue was fixed in DWARF4.

Differential Revision: https://reviews.llvm.org/D64622

llvm-svn: 366190
---
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp          |  4 ++--
 .../unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 77b4688c23465..3ee5652a0eb22 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -304,7 +304,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
   if (TotalLength == UINT32_MAX) {
     FormParams.Format = dwarf::DWARF64;
     TotalLength = DebugLineData.getU64(OffsetPtr);
-  } else if (TotalLength >= 0xffffff00) {
+  } else if (TotalLength >= 0xfffffff0) {
     return createStringError(errc::invalid_argument,
         "parsing line table prologue at offset 0x%8.8" PRIx64
         " unsupported reserved unit length found of value 0x%8.8" PRIx64,
@@ -1091,7 +1091,7 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data,
 }
 
 bool DWARFDebugLine::Prologue::totalLengthIsValid() const {
-  return TotalLength == 0xffffffff || TotalLength < 0xffffff00;
+  return TotalLength == 0xffffffff || TotalLength < 0xfffffff0;
 }
 
 DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext(
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index fef8dc50fb467..ce7c252d83ee8 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -291,13 +291,13 @@ TEST_F(DebugLineBasicFixture, ErrorForReservedLength) {
     return;
 
   LineTable &LT = Gen->addLineTable();
-  LT.setCustomPrologue({{0xffffff00, LineTable::Long}});
+  LT.setCustomPrologue({{0xfffffff0, LineTable::Long}});
 
   generate();
 
   checkGetOrParseLineTableEmitsError(
       "parsing line table prologue at offset 0x00000000 unsupported reserved "
-      "unit length found of value 0xffffff00");
+      "unit length found of value 0xfffffff0");
 }
 
 TEST_F(DebugLineBasicFixture, ErrorForLowVersion) {
@@ -532,7 +532,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenParsing) {
     return;
 
   LineTable &LT = Gen->addLineTable();
-  LT.setCustomPrologue({{0xffffff00, LineTable::Long}});
+  LT.setCustomPrologue({{0xfffffff0, LineTable::Long}});
   Gen->addLineTable();
   generate();
 
@@ -544,7 +544,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenParsing) {
   EXPECT_FALSE(Recoverable);
 
   checkError("parsing line table prologue at offset 0x00000000 unsupported "
-             "reserved unit length found of value 0xffffff00",
+             "reserved unit length found of value 0xfffffff0",
              std::move(Unrecoverable));
 }
 
@@ -553,7 +553,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenSkipping) {
     return;
 
   LineTable &LT = Gen->addLineTable();
-  LT.setCustomPrologue({{0xffffff00, LineTable::Long}});
+  LT.setCustomPrologue({{0xfffffff0, LineTable::Long}});
   Gen->addLineTable();
   generate();
 
@@ -564,7 +564,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenSkipping) {
   EXPECT_TRUE(Parser.done());
 
   checkError("parsing line table prologue at offset 0x00000000 unsupported "
-             "reserved unit length found of value 0xffffff00",
+             "reserved unit length found of value 0xfffffff0",
              std::move(Unrecoverable));
 }
 

From a54c46674efbf045d661831d727d4c48be26a7d1 Mon Sep 17 00:00:00 2001
From: Zi Xuan Wu <wuzish@cn.ibm.com>
Date: Tue, 16 Jul 2019 07:54:47 +0000
Subject: [PATCH 218/451] [NFC][PowerPC] Add test case for D64195

llvm-svn: 366191
---
 .../CodeGen/PowerPC/float-load-store-pair.ll  | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/float-load-store-pair.ll

diff --git a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
new file mode 100644
index 0000000000000..6a8bd8e7a57b5
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs < %s -mcpu=pwr9 -mtriple=powerpc64le-ibm-linux| FileCheck %s
+
+; This file verifies that for a given floating point load / store pair,
+; if the load value isn't used by any other operations,
+; then consider transforming the pair to integer load / store operations
+
+@a1 = local_unnamed_addr global double 0.000000e+00, align 8
+@a2 = local_unnamed_addr global double 0.000000e+00, align 8
+@a3 = local_unnamed_addr global double 0.000000e+00, align 8
+@a4 = local_unnamed_addr global double 0.000000e+00, align 8
+@a5 = local_unnamed_addr global double 0.000000e+00, align 8
+@a6 = local_unnamed_addr global double 0.000000e+00, align 8
+@a7 = local_unnamed_addr global double 0.000000e+00, align 8
+@a8 = local_unnamed_addr global double 0.000000e+00, align 8
+@a9 = local_unnamed_addr global double 0.000000e+00, align 8
+@a10 = local_unnamed_addr global double 0.000000e+00, align 8
+@a11 = local_unnamed_addr global double 0.000000e+00, align 8
+@a12 = local_unnamed_addr global double 0.000000e+00, align 8
+@a13 = local_unnamed_addr global double 0.000000e+00, align 8
+@a14 = local_unnamed_addr global double 0.000000e+00, align 8
+@a15 = local_unnamed_addr global double 0.000000e+00, align 8
+@a16 = local_unnamed_addr global ppc_fp128 0xM00000000000000000000000000000000, align 16
+@a17 = local_unnamed_addr global fp128 0xL00000000000000000000000000000000, align 16
+
+; Because this test function is trying to pass float argument by stack,
+; so the fpr is only used to load/store float argument
+define signext i32 @test() {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    std 0, 16(1)
+; CHECK-NEXT:    stdu 1, -192(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    addis 3, 2, a1@toc@ha
+; CHECK-NEXT:    lfd 1, a1@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a2@toc@ha
+; CHECK-NEXT:    lfd 2, a2@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a3@toc@ha
+; CHECK-NEXT:    lfd 3, a3@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a4@toc@ha
+; CHECK-NEXT:    lfd 4, a4@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a5@toc@ha
+; CHECK-NEXT:    lfd 5, a5@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a6@toc@ha
+; CHECK-NEXT:    lfd 6, a6@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a7@toc@ha
+; CHECK-NEXT:    lfd 7, a7@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a8@toc@ha
+; CHECK-NEXT:    lfd 8, a8@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a9@toc@ha
+; CHECK-NEXT:    lfd 9, a9@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a10@toc@ha
+; CHECK-NEXT:    lfd 10, a10@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a11@toc@ha
+; CHECK-NEXT:    lfd 11, a11@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a12@toc@ha
+; CHECK-NEXT:    lfd 12, a12@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a13@toc@ha
+; CHECK-NEXT:    lfd 13, a13@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a14@toc@ha
+; CHECK-NEXT:    lfd 0, a14@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a15@toc@ha
+; CHECK-NEXT:    addis 4, 2, a17@toc@ha
+; CHECK-NEXT:    addi 4, 4, a17@toc@l
+; CHECK-NEXT:    lxsd 2, a15@toc@l(3)
+; CHECK-NEXT:    addis 3, 2, a16@toc@ha
+; CHECK-NEXT:    addi 3, 3, a16@toc@l
+; CHECK-NEXT:    lxvx 36, 0, 4
+; CHECK-NEXT:    lxvx 35, 0, 3
+; CHECK-NEXT:    li 3, 168
+; CHECK-NEXT:    stxvx 36, 1, 3
+; CHECK-NEXT:    li 3, 152
+; CHECK-NEXT:    stxvx 35, 1, 3
+; CHECK-NEXT:    stxsd 2, 144(1)
+; CHECK-NEXT:    stfd 0, 136(1)
+; CHECK-NEXT:    bl _Z3fooddddddddddddddd
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    addi 1, 1, 192
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+%1 = load double, double* @a1, align 8
+%2 = load double, double* @a2, align 8
+%3 = load double, double* @a3, align 8
+%4 = load double, double* @a4, align 8
+%5 = load double, double* @a5, align 8
+%6 = load double, double* @a6, align 8
+%7 = load double, double* @a7, align 8
+%8 = load double, double* @a8, align 8
+%9 = load double, double* @a9, align 8
+%10 = load double, double* @a10, align 8
+%11 = load double, double* @a11, align 8
+%12 = load double, double* @a12, align 8
+%13 = load double, double* @a13, align 8
+%14 = load double, double* @a14, align 8
+%15 = load double, double* @a15, align 8
+%16 = load ppc_fp128, ppc_fp128* @a16, align 16
+%17 = load fp128, fp128* @a17, align 16
+tail call void @_Z3fooddddddddddddddd(double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, ppc_fp128 %16, fp128 %17)
+ret i32 0
+}
+
+declare void @_Z3fooddddddddddddddd(double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, ppc_fp128, fp128)

From 33fdf82dda0a687133016cc41cffd4ece6693d69 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 16 Jul 2019 08:08:17 +0000
Subject: [PATCH 219/451] [WebAssembly] Rename variale references in comments
 after VariableName -> variableName change

llvm-svn: 366192
---
 lld/wasm/InputFiles.cpp        | 2 +-
 lld/wasm/MarkLive.cpp          | 4 ++--
 lld/wasm/SymbolTable.h         | 2 +-
 lld/wasm/SyntheticSections.cpp | 4 ++--
 lld/wasm/Writer.cpp            | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index b5233cf09ed00..b4945469f931c 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -328,7 +328,7 @@ void ObjFile::parse(bool ignoreComdats) {
   for (const WasmEvent &e : wasmObj->events())
     events.emplace_back(make<InputEvent>(types[e.Type.SigIndex], e, this));
 
-  // Populate `Symbols` based on the WasmSymbols in the object.
+  // Populate `Symbols` based on the symbols in the object.
   symbols.reserve(wasmObj->getNumberOfSymbols());
   for (const SymbolRef &sym : wasmObj->symbols()) {
     const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 703daf27e01cf..9399156b748ce 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -46,9 +46,9 @@ void lld::wasm::markLive() {
     if (InputChunk *chunk = sym->getChunk())
       q.push_back(chunk);
 
-    // The ctor functions are all referenced by the synthetic CallCtors
+    // The ctor functions are all referenced by the synthetic callCtors
     // function.  However, this function does not contain relocations so we
-    // have to manually mark the ctors as live if CallCtors itself is live.
+    // have to manually mark the ctors as live if callCtors itself is live.
     if (sym == WasmSym::callCtors) {
       if (config->passiveSegments)
         enqueue(WasmSym::initMemory);
diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h
index 33f02ddaf9101..530d5e864103d 100644
--- a/lld/wasm/SymbolTable.h
+++ b/lld/wasm/SymbolTable.h
@@ -98,7 +98,7 @@ class SymbolTable {
   InputFunction *replaceWithUnreachable(Symbol *sym, const WasmSignature &sig,
                                         StringRef debugName);
 
-  // Maps symbol names to index into the SymVector.  -1 means that symbols
+  // Maps symbol names to index into the symVector.  -1 means that symbols
   // is to not yet in the vector but it should have tracing enabled if it is
   // ever added.
   llvm::DenseMap<llvm::CachedHashStringRef, int> symMap;
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 020d2c0b99265..6d5d14ff40e64 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -474,8 +474,8 @@ void NameSection::writeBody() {
   SubSection sub(WASM_NAMES_FUNCTION);
   writeUleb128(sub.os, numNames(), "name count");
 
-  // Names must appear in function index order.  As it happens ImportedSymbols
-  // and InputFunctions are numbered in order with imported functions coming
+  // Names must appear in function index order.  As it happens importedSymbols
+  // and inputFunctions are numbered in order with imported functions coming
   // first.
   for (const Symbol *s : out.importSec->importedSymbols) {
     if (auto *f = dyn_cast<FunctionSymbol>(s)) {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 5d81f2305c9d9..77a29a2d99ef4 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -191,7 +191,7 @@ void Writer::writeSections() {
 // to each of the input data sections as well as the explicit stack region.
 // The default memory layout is as follows, from low to high.
 //
-//  - initialized data (starting at Config->GlobalBase)
+//  - initialized data (starting at Config->globalBase)
 //  - BSS data (not currently implemented in llvm)
 //  - explicit stack (Config->ZStackSize)
 //  - heap start / unallocated

From 2e2038b6470d4fdcdfd29bd111e67f12f688cef0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 16 Jul 2019 08:26:38 +0000
Subject: [PATCH 220/451] [COFF] Rename variale references in comments after
 VariableName -> variableName change

llvm-svn: 366193
---
 lld/COFF/Chunks.cpp      | 8 ++++----
 lld/COFF/Config.h        | 2 +-
 lld/COFF/DLL.cpp         | 2 +-
 lld/COFF/Driver.cpp      | 8 ++++----
 lld/COFF/DriverUtils.cpp | 2 +-
 lld/COFF/InputFiles.cpp  | 8 ++++----
 lld/COFF/InputFiles.h    | 2 +-
 lld/COFF/PDB.cpp         | 6 +++---
 lld/COFF/SymbolTable.cpp | 2 +-
 lld/COFF/SymbolTable.h   | 2 +-
 lld/COFF/Writer.cpp      | 6 +++---
 11 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 374751ad510ac..0e43d2b478b40 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -31,10 +31,10 @@ namespace coff {
 
 SectionChunk::SectionChunk(ObjFile *f, const coff_section *h)
     : Chunk(SectionKind), file(f), header(h), repl(this) {
-  // Initialize Relocs.
+  // Initialize relocs.
   setRelocs(file->getCOFFObj()->getRelocations(header));
 
-  // Initialize SectionName.
+  // Initialize sectionName.
   StringRef sectionName;
   if (Expected<StringRef> e = file->getCOFFObj()->getSectionName(header))
     sectionName = *e;
@@ -218,7 +218,7 @@ void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift) {
 
 // Update the immediate field in a AARCH64 ldr, str, and add instruction.
 // Optionally limit the range of the written immediate by one or more bits
-// (RangeLimit).
+// (rangeLimit).
 void applyArm64Imm(uint8_t *off, uint64_t imm, uint32_t rangeLimit) {
   uint32_t orig = read32le(off);
   imm += (orig >> 10) & 0xFFF;
@@ -561,7 +561,7 @@ void SectionChunk::getRuntimePseudoRelocs(
             toString(file));
       continue;
     }
-    // SizeInBits is used to initialize the Flags field; currently no
+    // sizeInBits is used to initialize the Flags field; currently no
     // other flags are defined.
     res.emplace_back(
         RuntimePseudoReloc(target, this, rel.VirtualAddress, sizeInBits));
diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index f7c8097147654..e378b6fc72484 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -49,7 +49,7 @@ struct Export {
 
   // If an export is a form of /export:foo=dllname.bar, that means
   // that foo should be exported as an alias to bar in the DLL.
-  // ForwardTo is set to "dllname.bar" part. Usually empty.
+  // forwardTo is set to "dllname.bar" part. Usually empty.
   StringRef forwardTo;
   StringChunk *forwardChunk = nullptr;
 
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 932e7d64553ec..40d1f463aa3ff 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -567,7 +567,7 @@ void IdataContents::create() {
   // Create .idata contents for each DLL.
   for (std::vector<DefinedImportData *> &syms : v) {
     // Create lookup and address tables. If they have external names,
-    // we need to create HintName chunks to store the names.
+    // we need to create hintName chunks to store the names.
     // If they don't (if they are import-by-ordinals), we store only
     // ordinal values to the table.
     size_t base = lookups.size();
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index bb5c0bb711b99..6cfd83ab96b6c 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -936,7 +936,7 @@ static void findKeepUniqueSections() {
   }
 }
 
-// link.exe replaces each %foo% in AltPath with the contents of environment
+// link.exe replaces each %foo% in altPath with the contents of environment
 // variable foo, and adds the two magic env vars _PDB (expands to the basename
 // of pdb's output path) and _EXT (expands to the extension of the output
 // binary).
@@ -952,9 +952,9 @@ static void parsePDBAltPath(StringRef altPath) {
     binaryExtension = binaryExtension.substr(1); // %_EXT% does not include '.'.
 
   // Invariant:
-  //   +--------- Cursor ('a...' might be the empty string).
-  //   |   +----- FirstMark
-  //   |   |   +- SecondMark
+  //   +--------- cursor ('a...' might be the empty string).
+  //   |   +----- firstMark
+  //   |   |   +- secondMark
   //   v   v   v
   //   a...%...%...
   size_t cursor = 0;
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index edc3b3707c7b6..4360ac23b2622 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -345,7 +345,7 @@ class TemporaryFile {
   // so it is safe to remove the file immediately after this function
   // is called (you cannot remove an opened file on Windows.)
   std::unique_ptr<MemoryBuffer> getMemoryBuffer() {
-    // IsVolatileSize=true forces MemoryBuffer to not use mmap().
+    // IsVolatile=true forces MemoryBuffer to not use mmap().
     return CHECK(MemoryBuffer::getFile(path, /*FileSize=*/-1,
                                        /*RequiresNullTerminator=*/false,
                                        /*IsVolatile=*/true),
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 9936a6f69ffed..c00d5c5b494ef 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -556,8 +556,8 @@ Optional<Symbol *> ObjFile::createDefined(
   // The second symbol entry has the name of the comdat symbol, called the
   // "comdat leader".
   // When this function is called for the first symbol entry of a comdat,
-  // it sets ComdatDefs and returns None, and when it's called for the second
-  // symbol entry it reads ComdatDefs and then sets it back to nullptr.
+  // it sets comdatDefs and returns None, and when it's called for the second
+  // symbol entry it reads comdatDefs and then sets it back to nullptr.
 
   // Handle comdat leader.
   if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) {
@@ -626,7 +626,7 @@ ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
 // even if the TU was compiled with no debug info. At least two records are
 // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the
 // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is
-// currently used to initialize the HotPatchable member.
+// currently used to initialize the hotPatchable member.
 void ObjFile::initializeFlags() {
   ArrayRef<uint8_t> data = getDebugSection(".debug$S");
   if (data.empty())
@@ -764,7 +764,7 @@ void ImportFile::parse() {
 
   impSym = symtab->addImportData(impName, this);
   // If this was a duplicate, we logged an error but may continue;
-  // in this case, ImpSym is nullptr.
+  // in this case, impSym is nullptr.
   if (!impSym)
     return;
 
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 6c84ceec13dbd..dfad9814a397d 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -119,7 +119,7 @@ class ObjFile : public InputFile {
 
   ArrayRef<uint8_t> getDebugSection(StringRef secName);
 
-  // Returns a Symbol object for the SymbolIndex'th symbol in the
+  // Returns a Symbol object for the symbolIndex'th symbol in the
   // underlying object file.
   Symbol *getSymbol(uint32_t symbolIndex) {
     return symbols[symbolIndex];
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index d024f7573f3da..a55e5136e040b 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -119,7 +119,7 @@ class PDBLinker {
   ///
   /// If the object does not use a type server PDB (compiled with /Z7), we merge
   /// all the type and item records from the .debug$S stream and fill in the
-  /// caller-provided ObjectIndexMap.
+  /// caller-provided objectIndexMap.
   Expected<const CVIndexMap &> mergeDebugT(ObjFile *file,
                                            CVIndexMap *objectIndexMap);
 
@@ -683,7 +683,7 @@ static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
 
     TypeIndex *ti =
         reinterpret_cast<TypeIndex *>(content.data() + refs[0].Offset);
-    // `TI` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
+    // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
     // the IPI stream, whose `FunctionType` member refers to the TPI stream.
     // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
     // in both cases we just need the second type index.
@@ -1729,7 +1729,7 @@ static bool findLineTable(const SectionChunk *c, uint32_t addr,
     if (dbgC->getSectionName() != ".debug$S")
       continue;
 
-    // Build a mapping of SECREL relocations in DbgC that refer to C.
+    // Build a mapping of SECREL relocations in dbgC that refer to `c`.
     DenseMap<uint32_t, uint32_t> secrels;
     for (const coff_relocation &r : dbgC->getRelocs()) {
       if (r.Type != secrelReloc)
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 2173c10c1ca56..0aff164ee5677 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -192,7 +192,7 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
 
   // Replace the reference directly to a variable with a reference
   // to the import address table instead. This obviously isn't right,
-  // but we mark the symbol as IsRuntimePseudoReloc, and a later pass
+  // but we mark the symbol as isRuntimePseudoReloc, and a later pass
   // will add runtime pseudo relocations for every relocation against
   // this Symbol. The runtime pseudo relocation framework expects the
   // reference itself to point at the IAT entry.
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index 75bd3933547b0..88f47cbe9e78a 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -112,7 +112,7 @@ class SymbolTable {
 private:
   /// Inserts symbol if not already present.
   std::pair<Symbol *, bool> insert(StringRef name);
-  /// Same as insert(Name), but also sets IsUsedInRegularObj.
+  /// Same as insert(Name), but also sets isUsedInRegularObj.
   std::pair<Symbol *, bool> insert(StringRef name, InputFile *f);
 
   std::vector<Symbol *> getSymsWithPrefix(StringRef prefix);
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 46b1af0934ab3..e4b35a5f8beb9 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -335,7 +335,7 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) {
 } // namespace lld
 
 // Check whether the target address S is in range from a relocation
-// of type RelType at address P.
+// of type relType at address P.
 static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
   if (config->machine == ARMNT) {
     int64_t diff = AbsoluteDifference(s, p + 4) + margin;
@@ -427,7 +427,7 @@ static bool createThunks(OutputSection *os, int margin) {
 
       // The estimate of the source address P should be pretty accurate,
       // but we don't know whether the target Symbol address should be
-      // offset by ThunkSize or not (or by some of ThunksSize but not all of
+      // offset by thunksSize or not (or by some of thunksSize but not all of
       // it), giving us some uncertainty once we have added one thunk.
       uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize;
 
@@ -1626,7 +1626,7 @@ void Writer::markSymbolsForRVATable(ObjFile *file,
 }
 
 // Replace the absolute table symbol with a synthetic symbol pointing to
-// TableChunk so that we can emit base relocations for it and resolve section
+// tableChunk so that we can emit base relocations for it and resolve section
 // relative relocations.
 void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
                               StringRef countSym) {

From a3077526277c10bda5395ceebdb6963f9c253651 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Tue, 16 Jul 2019 08:56:47 +0000
Subject: [PATCH 221/451] [clang-scan-view] Force utf-8 when handling report
 (python2 only)

Original patch by random human <random.bored.human@gmail.com>

Differential Revision: https://reviews.llvm.org/D64129

llvm-svn: 366194
---
 clang/tools/scan-view/share/ScanView.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/tools/scan-view/share/ScanView.py b/clang/tools/scan-view/share/ScanView.py
index c40366b2e8490..a6cc7692ffe00 100644
--- a/clang/tools/scan-view/share/ScanView.py
+++ b/clang/tools/scan-view/share/ScanView.py
@@ -764,11 +764,11 @@ def send_patched_file(self, path, ctype):
             variables['report'] = m.group(2)
 
         try:
-            f = open(path,'r')
+            f = open(path,'rb')
         except IOError:
             return self.send_404()
         fs = os.fstat(f.fileno())
-        data = f.read()
+        data = f.read().decode('utf-8')
         for a,b in kReportReplacements:
             data = a.sub(b % variables, data)
         return self.send_string(data, ctype, mtime=fs.st_mtime)

From a3e26d1a6cdfb5a3a97750863abb31e1e3fdd66b Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Tue, 16 Jul 2019 09:15:01 +0000
Subject: [PATCH 222/451] [NFC] Test commit: add full stop at end of comment

llvm-svn: 366195
---
 llvm/lib/Target/ARM/ARMInstrVFP.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 93c27e4630053..a0dd25de07eea 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -857,7 +857,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
 
       let Inst{17-16} = rm;
 
-      // Encode instruction operands
+      // Encode instruction operands.
       let Inst{3-0} = Dm{3-0};
       let Inst{5}   = Dm{4};
       let Inst{8} = 1;

From c5a2d7470e10576684bc9a74626d96db8ff069f1 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 16 Jul 2019 09:27:02 +0000
Subject: [PATCH 223/451] [lldb] Rename Options.inc to CommandOptions.inc [NFC]

It seems having two Options.inc files in the same project is giving our
custom Xcode project a hard time. This patch renames the new Options.inc
to CommandOptions.inc to prevent this conflict.

llvm-svn: 366196
---
 lldb/source/Commands/CMakeLists.txt              | 2 +-
 lldb/source/Commands/CommandObjectBreakpoint.cpp | 2 +-
 lldb/source/Commands/CommandObjectHelp.cpp       | 2 +-
 lldb/source/Commands/CommandObjectSettings.cpp   | 6 +++---
 lldb/source/Commands/CommandObjectTarget.cpp     | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lldb/source/Commands/CMakeLists.txt b/lldb/source/Commands/CMakeLists.txt
index f3f96af7dab02..657da8caaf361 100644
--- a/lldb/source/Commands/CMakeLists.txt
+++ b/lldb/source/Commands/CMakeLists.txt
@@ -1,4 +1,4 @@
-lldb_tablegen(Options.inc -gen-lldb-option-defs
+lldb_tablegen(CommandOptions.inc -gen-lldb-option-defs
   SOURCE Options.td
   TARGET LLDBOptionsGen)
 
diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp
index a661ffc62512c..c33f3834cb13b 100644
--- a/lldb/source/Commands/CommandObjectBreakpoint.cpp
+++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp
@@ -1249,7 +1249,7 @@ static constexpr OptionDefinition g_breakpoint_list_options[] = {
   // FIXME: We need to add an "internal" command, and then add this sort of
   // thing to it. But I need to see it for now, and don't want to wait.
 #define LLDB_OPTIONS_breakpoint_list
-#include "Options.inc"
+#include "CommandOptions.inc"
 };
 
 #pragma mark List
diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp
index ad53e03121f0d..ab557919d0a06 100644
--- a/lldb/source/Commands/CommandObjectHelp.cpp
+++ b/lldb/source/Commands/CommandObjectHelp.cpp
@@ -67,7 +67,7 @@ CommandObjectHelp::~CommandObjectHelp() = default;
 
 static constexpr OptionDefinition g_help_options[] = {
 #define LLDB_OPTIONS_help
-#include "Options.inc"
+#include "CommandOptions.inc"
 };
 
 llvm::ArrayRef<OptionDefinition>
diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp
index 057c5de619cfa..55a0002c59973 100644
--- a/lldb/source/Commands/CommandObjectSettings.cpp
+++ b/lldb/source/Commands/CommandObjectSettings.cpp
@@ -23,7 +23,7 @@ using namespace lldb_private;
 
 static constexpr OptionDefinition g_settings_set_options[] = {
 #define LLDB_OPTIONS_settings_set
-#include "Options.inc"
+#include "CommandOptions.inc"
 };
 
 class CommandObjectSettingsSet : public CommandObjectRaw {
@@ -312,7 +312,7 @@ class CommandObjectSettingsShow : public CommandObjectParsed {
 
 static constexpr OptionDefinition g_settings_write_options[] = {
 #define LLDB_OPTIONS_settings_write
-#include "Options.inc"
+#include "CommandOptions.inc"
 };
 
 class CommandObjectSettingsWrite : public CommandObjectParsed {
@@ -435,7 +435,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed {
 
 static constexpr OptionDefinition g_settings_read_options[] = {
 #define LLDB_OPTIONS_settings_read
-#include "Options.inc"
+#include "CommandOptions.inc"
 };
 
 class CommandObjectSettingsRead : public CommandObjectParsed {
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index e8720157ab4cb..e913a28501f23 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -1968,7 +1968,7 @@ static constexpr OptionEnumValueElement g_sort_option_enumeration[] = {
 
 static constexpr OptionDefinition g_target_modules_dump_symtab_options[] = {
 #define LLDB_OPTIONS_target_modules_dump_symtab
-#include "Options.inc"
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTargetModulesDumpSymtab

From eb72138340ce36f3bdd29658eb2ff730cbaa25d7 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Tue, 16 Jul 2019 09:27:39 +0000
Subject: [PATCH 224/451] [AArch64] Implement __jcvt intrinsic from Armv8.3-A

The jcvt intrinsic defined in ACLE [1] is available when ARM_FEATURE_JCVT is defined.

This change introduces the AArch64 intrinsic, wires it up to the instruction and a new clang builtin function.
The __ARM_FEATURE_JCVT macro is now defined when an Armv8.3-A or higher target is used.
I've implemented the target detection logic in Clang so that this feature is enabled for architectures from armv8.3-a onwards (so -march=armv8.4-a also enables this, for example).

make check-all didn't show any new failures.

[1] https://developer.arm.com/docs/101028/latest/data-processing-intrinsics

Differential Revision: https://reviews.llvm.org/D64495

llvm-svn: 366197
---
 clang/include/clang/Basic/BuiltinsAArch64.def |  2 +
 clang/lib/Basic/Targets/AArch64.cpp           | 37 +++++++++++++++++++
 clang/lib/Basic/Targets/AArch64.h             |  6 +++
 clang/lib/CodeGen/CGBuiltin.cpp               |  8 ++++
 clang/lib/Headers/arm_acle.h                  |  8 ++++
 clang/test/CodeGen/arm_acle.c                 | 11 ++++++
 clang/test/CodeGen/builtins-arm64.c           |  6 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  2 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  4 +-
 llvm/test/CodeGen/AArch64/fjcvtzs.ll          | 10 +++++
 llvm/utils/git-svn/git-llvm                   |  2 +-
 11 files changed, 94 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/fjcvtzs.ll

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 5ba03da4a7a05..7701ad98f4832 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -65,6 +65,8 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc")
 BUILTIN(__builtin_arm_dsb, "vUi", "nc")
 BUILTIN(__builtin_arm_isb, "vUi", "nc")
 
+BUILTIN(__builtin_arm_jcvt, "Zid", "nc")
+
 // Prefetch
 BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
 
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 6011ae17b78ea..a02530ad06756 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -118,6 +118,28 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts,
   getTargetDefinesARMV81A(Opts, Builder);
 }
 
+void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
+                                                MacroBuilder &Builder) const {
+  Builder.defineMacro("__ARM_FEATURE_JCVT", "1");
+  // Also include the Armv8.2 defines
+  getTargetDefinesARMV82A(Opts, Builder);
+}
+
+void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts,
+                                                MacroBuilder &Builder) const {
+  // Also include the Armv8.3 defines
+  // FIXME: Armv8.4 makes some extensions mandatory. Handle them here.
+  getTargetDefinesARMV83A(Opts, Builder);
+}
+
+void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts,
+                                                MacroBuilder &Builder) const {
+  // Also include the Armv8.4 defines
+  // FIXME: Armv8.5 makes some extensions mandatory. Handle them here.
+  getTargetDefinesARMV84A(Opts, Builder);
+}
+
+
 void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
                                          MacroBuilder &Builder) const {
   // Target identification.
@@ -209,6 +231,15 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   case llvm::AArch64::ArchKind::ARMV8_2A:
     getTargetDefinesARMV82A(Opts, Builder);
     break;
+  case llvm::AArch64::ArchKind::ARMV8_3A:
+    getTargetDefinesARMV83A(Opts, Builder);
+    break;
+  case llvm::AArch64::ArchKind::ARMV8_4A:
+    getTargetDefinesARMV84A(Opts, Builder);
+    break;
+  case llvm::AArch64::ArchKind::ARMV8_5A:
+    getTargetDefinesARMV85A(Opts, Builder);
+    break;
   }
 
   // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
@@ -256,6 +287,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       ArchKind = llvm::AArch64::ArchKind::ARMV8_1A;
     if (Feature == "+v8.2a")
       ArchKind = llvm::AArch64::ArchKind::ARMV8_2A;
+    if (Feature == "+v8.3a")
+      ArchKind = llvm::AArch64::ArchKind::ARMV8_3A;
+    if (Feature == "+v8.4a")
+      ArchKind = llvm::AArch64::ArchKind::ARMV8_4A;
+    if (Feature == "+v8.5a")
+      ArchKind = llvm::AArch64::ArchKind::ARMV8_5A;
     if (Feature == "+fullfp16")
       HasFullFP16 = 1;
     if (Feature == "+dotprod")
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 0241b585c4d13..de0aed78e037e 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -59,6 +59,12 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
                                MacroBuilder &Builder) const;
   void getTargetDefinesARMV82A(const LangOptions &Opts,
                                MacroBuilder &Builder) const;
+  void getTargetDefinesARMV83A(const LangOptions &Opts,
+                               MacroBuilder &Builder) const;
+  void getTargetDefinesARMV84A(const LangOptions &Opts,
+                               MacroBuilder &Builder) const;
+  void getTargetDefinesARMV85A(const LangOptions &Opts,
+                               MacroBuilder &Builder) const;
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c58d1018fa0ec..acaa81ae8a9a6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6977,6 +6977,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
         CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {
+    assert((getContext().getTypeSize(E->getType()) == 32) &&
+           "__jcvt of unusual size!");
+    llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
+    return Builder.CreateCall(
+        CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
+  }
+
   if (BuiltinID == AArch64::BI__clear_cache) {
     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
     const FunctionDecl *FD = E->getDirectCallee();
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 08d65fa0d0696..096cc261af2c6 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -597,6 +597,14 @@ __crc32cd(uint32_t __a, uint64_t __b) {
 }
 #endif
 
+/* Armv8.3-A Javascript conversion intrinsic */
+#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT)
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__jcvt(double __a) {
+  return __builtin_arm_jcvt(__a);
+}
+#endif
+
 /* 10.1 Special register intrinsics */
 #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
 #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index e8a744372de2b..beca937350678 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -2,6 +2,9 @@
 // RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O2  -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-NEWPM -check-prefix=AArch32-NEWPM
 // RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O2 -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-LEGACY -check-prefix=AArch64-LEGACY
 // RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-NEWPM -check-prefix=AArch64-NEWPM
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.3a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.4a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3
+// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.5a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3
 
 #include <arm_acle.h>
 
@@ -823,3 +826,11 @@ void test_wsrp(void *v) {
 
 // AArch64: ![[M0]] = !{!"1:2:3:4:5"}
 // AArch64: ![[M1]] = !{!"sysreg"}
+
+// AArch64-v8_3-LABEL: @test_jcvt(
+// AArch64-v8_3: call i32 @llvm.aarch64.fjcvtzs
+#ifdef __ARM_64BIT_STATE
+int32_t test_jcvt(double v) {
+  return __jcvt(v);
+}
+#endif
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index f164c2f6f3647..5ec63fba82b5b 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -58,6 +58,12 @@ void prefetch() {
 // CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0)
 }
 
+int32_t jcvt(double v) {
+  //CHECK-LABEL: @jcvt(
+  //CHECK: call i32 @llvm.aarch64.fjcvtzs
+  return __builtin_arm_jcvt(v);
+}
+
 __typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void);
 
 uint32_t rsr() {
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 720a7bdde2375..7616d6a90c1bc 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -31,6 +31,8 @@ def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
                                 LLVMMatchType<0>], [IntrNoMem]>;
 
+def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // HINT
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 41497a6c4fbc0..897b3ebb3847f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -717,7 +717,9 @@ let Predicates = [HasPA] in {
 // v8.3a floating point conversion for javascript
 let Predicates = [HasJS, HasFPARMv8] in
 def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
-                                      "fjcvtzs", []> {
+                                      "fjcvtzs",
+                                      [(set GPR32:$Rd,
+                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
   let Inst{31} = 0;
 } // HasJS, HasFPARMv8
 
diff --git a/llvm/test/CodeGen/AArch64/fjcvtzs.ll b/llvm/test/CodeGen/AArch64/fjcvtzs.ll
new file mode 100644
index 0000000000000..017694dcd7b19
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fjcvtzs.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=arm64-eabi -mattr=+jsconv -o - %s | FileCheck %s
+
+define i32 @test_jcvt(double %v) {
+; CHECK-LABEL: test_jcvt:
+; CHECK: fjcvtzs w0, d0
+  %val = call i32 @llvm.aarch64.fjcvtzs(double %v)
+  ret i32 %val
+}
+
+declare i32 @llvm.aarch64.fjcvtzs(double)
diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm
index 289898d15b5fb..13c49b5fdf0c6 100755
--- a/llvm/utils/git-svn/git-llvm
+++ b/llvm/utils/git-svn/git-llvm
@@ -372,7 +372,7 @@ def svn_push_one_rev(svn_repo, rev, git_to_svn_mapping, dry_run):
     # Now we're ready to commit.
     commit_msg = git('show', '--pretty=%B', '--quiet', rev)
     if not dry_run:
-        commit_args = ['commit', '-m', commit_msg]
+        commit_args = ['commit', '-m', commit_msg, '--username', 'ktkachov']
         if '--force-interactive' in svn(svn_repo, 'commit', '--help'):
             commit_args.append('--force-interactive')
         log(svn(svn_repo, *commit_args))

From 1781c28a0d3433b0608f504e45660f8511ba7742 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 16 Jul 2019 10:14:53 +0000
Subject: [PATCH 225/451] Remove username from git-llvm script, erroneously
 added in 366197

llvm-svn: 366198
---
 llvm/utils/git-svn/git-llvm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm
index 13c49b5fdf0c6..289898d15b5fb 100755
--- a/llvm/utils/git-svn/git-llvm
+++ b/llvm/utils/git-svn/git-llvm
@@ -372,7 +372,7 @@ def svn_push_one_rev(svn_repo, rev, git_to_svn_mapping, dry_run):
     # Now we're ready to commit.
     commit_msg = git('show', '--pretty=%B', '--quiet', rev)
     if not dry_run:
-        commit_args = ['commit', '-m', commit_msg, '--username', 'ktkachov']
+        commit_args = ['commit', '-m', commit_msg]
         if '--force-interactive' in svn(svn_repo, 'commit', '--help'):
             commit_args.append('--force-interactive')
         log(svn(svn_repo, *commit_args))

From 06377ae2e585fd4df695f973cd8ee6b3f76bfe5f Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 16 Jul 2019 10:17:06 +0000
Subject: [PATCH 226/451] [clangd] Don't rebuild background index until we
 indexed one TU per thread.

Summary:
This increases the odds that the boosted file (cpp file matching header)
will be ready. (It always enqueues first, so it'll be present unless
another thread indexes *two* files before the first thread indexes one.)

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64682

llvm-svn: 366199
---
 clang-tools-extra/clangd/index/Background.cpp      |  2 +-
 clang-tools-extra/clangd/index/BackgroundRebuild.h | 14 ++++++++------
 .../clangd/unittests/BackgroundIndexTests.cpp      | 10 +++++-----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 458e6fc355f82..23445e16b2f3f 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -127,7 +127,7 @@ BackgroundIndex::BackgroundIndex(
     BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize)
     : SwapIndex(llvm::make_unique<MemIndex>()), FSProvider(FSProvider),
       CDB(CDB), BackgroundContext(std::move(BackgroundContext)),
-      Rebuilder(this, &IndexedSymbols),
+      Rebuilder(this, &IndexedSymbols, ThreadPoolSize),
       IndexStorageFactory(std::move(IndexStorageFactory)),
       CommandsChanged(
           CDB.watch([&](const std::vector<std::string> &ChangedFiles) {
diff --git a/clang-tools-extra/clangd/index/BackgroundRebuild.h b/clang-tools-extra/clangd/index/BackgroundRebuild.h
index 5a6227e8baab3..f660957f62419 100644
--- a/clang-tools-extra/clangd/index/BackgroundRebuild.h
+++ b/clang-tools-extra/clangd/index/BackgroundRebuild.h
@@ -16,6 +16,7 @@
 
 #include "index/FileIndex.h"
 #include "index/Index.h"
+#include "llvm/Support/Threading.h"
 
 namespace clang {
 namespace clangd {
@@ -45,12 +46,9 @@ namespace clangd {
 // This class is exposed in the header so it can be tested.
 class BackgroundIndexRebuilder {
 public:
-  // Thresholds for rebuilding as TUs get indexed.
-  static constexpr unsigned TUsBeforeFirstBuild = 5;
-  static constexpr unsigned TUsBeforeRebuild = 100;
-
-  BackgroundIndexRebuilder(SwapIndex *Target, FileSymbols *Source)
-      : Target(Target), Source(Source) {}
+  BackgroundIndexRebuilder(SwapIndex *Target, FileSymbols *Source,
+                           unsigned Threads)
+      : TUsBeforeFirstBuild(Threads), Target(Target), Source(Source) {}
 
   // Called to indicate a TU has been indexed.
   // May rebuild, if enough TUs have been indexed.
@@ -71,6 +69,10 @@ class BackgroundIndexRebuilder {
   // Ensures we won't start any more rebuilds.
   void shutdown();
 
+  // Thresholds for rebuilding as TUs get indexed.
+  const unsigned TUsBeforeFirstBuild; // Typically one per worker thread.
+  const unsigned TUsBeforeRebuild = 100;
+
 private:
   // Run Check under the lock, and rebuild if it returns true.
   void maybeRebuild(const char *Reason, std::function<bool()> Check);
diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
index 15d064a995ca8..79e081bd67893 100644
--- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -575,7 +575,8 @@ TEST_F(BackgroundIndexTest, CmdLineHash) {
 class BackgroundIndexRebuilderTest : public testing::Test {
 protected:
   BackgroundIndexRebuilderTest()
-      : Target(llvm::make_unique<MemIndex>()), Rebuilder(&Target, &Source) {
+      : Target(llvm::make_unique<MemIndex>()),
+        Rebuilder(&Target, &Source, /*Threads=*/10) {
     // Prepare FileSymbols with TestSymbol in it, for checkRebuild.
     TestSymbol.ID = SymbolID("foo");
   }
@@ -610,11 +611,10 @@ class BackgroundIndexRebuilderTest : public testing::Test {
 };
 
 TEST_F(BackgroundIndexRebuilderTest, IndexingTUs) {
-  for (unsigned I = 0; I < BackgroundIndexRebuilder::TUsBeforeFirstBuild - 1;
-       ++I)
+  for (unsigned I = 0; I < Rebuilder.TUsBeforeFirstBuild - 1; ++I)
     EXPECT_FALSE(checkRebuild([&] { Rebuilder.indexedTU(); }));
   EXPECT_TRUE(checkRebuild([&] { Rebuilder.indexedTU(); }));
-  for (unsigned I = 0; I < BackgroundIndexRebuilder::TUsBeforeRebuild - 1; ++I)
+  for (unsigned I = 0; I < Rebuilder.TUsBeforeRebuild - 1; ++I)
     EXPECT_FALSE(checkRebuild([&] { Rebuilder.indexedTU(); }));
   EXPECT_TRUE(checkRebuild([&] { Rebuilder.indexedTU(); }));
 }
@@ -640,7 +640,7 @@ TEST_F(BackgroundIndexRebuilderTest, LoadingShards) {
 
   // No rebuilding for indexed files while loading.
   Rebuilder.startLoading();
-  for (unsigned I = 0; I < 3 * BackgroundIndexRebuilder::TUsBeforeRebuild; ++I)
+  for (unsigned I = 0; I < 3 * Rebuilder.TUsBeforeRebuild; ++I)
     EXPECT_FALSE(checkRebuild([&] { Rebuilder.indexedTU(); }));
   // But they get indexed when we're done, even if no shards were loaded.
   EXPECT_TRUE(checkRebuild([&] { Rebuilder.doneLoading(); }));

From 0afffab0d1ea1a8eeccb2d32b976bbecacd4178a Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 16 Jul 2019 10:30:21 +0000
Subject: [PATCH 227/451] [SemaTemplate] Fix uncorrected typos after pack
 expansion

Summary:
This case is particularly important for clangd, as it is triggered after
inserting the snippet for variadic functions.

Reviewers: kadircet, ilya-biryukov

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64677

llvm-svn: 366200
---
 clang/lib/Sema/SemaTemplateVariadic.cpp   | 1 +
 clang/test/SemaTemplate/typo-variadic.cpp | 2 ++
 2 files changed, 3 insertions(+)
 create mode 100644 clang/test/SemaTemplate/typo-variadic.cpp

diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 9b23624a9a81f..d97626551a41f 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -619,6 +619,7 @@ ExprResult Sema::CheckPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc,
   if (!Pattern->containsUnexpandedParameterPack()) {
     Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs)
     << Pattern->getSourceRange();
+    CorrectDelayedTyposInExpr(Pattern);
     return ExprError();
   }
 
diff --git a/clang/test/SemaTemplate/typo-variadic.cpp b/clang/test/SemaTemplate/typo-variadic.cpp
new file mode 100644
index 0000000000000..c9b777aebbe91
--- /dev/null
+++ b/clang/test/SemaTemplate/typo-variadic.cpp
@@ -0,0 +1,2 @@
+// RUN: %clang_cc1 -fsyntax-only %s -verify
+int x = m(s...); // expected-error{{pack expansion does not}} expected-error{{undeclared identifier}}

From 971ac4ca2cb8172a1c8f59c7703680b41aa7a5f5 Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Tue, 16 Jul 2019 11:02:11 +0000
Subject: [PATCH 228/451] Reapply [llvm-ar][test] Add to MRI test coverage

This reapplies 363232 without mri-utf8.test due to failing on Darwin.

Differential Revision: https://reviews.llvm.org/D63197

llvm-svn: 366201
---
 llvm/test/tools/llvm-ar/mri-addlib.test   | 45 +++++++++++++++++++++
 llvm/test/tools/llvm-ar/mri-addmod.test   | 27 +++++++++++++
 llvm/test/tools/llvm-ar/mri-comments.test | 19 +++++++++
 llvm/test/tools/llvm-ar/mri-end.test      | 48 +++++++++++++++++++++++
 4 files changed, 139 insertions(+)
 create mode 100644 llvm/test/tools/llvm-ar/mri-addlib.test
 create mode 100644 llvm/test/tools/llvm-ar/mri-addmod.test
 create mode 100644 llvm/test/tools/llvm-ar/mri-comments.test
 create mode 100644 llvm/test/tools/llvm-ar/mri-end.test

diff --git a/llvm/test/tools/llvm-ar/mri-addlib.test b/llvm/test/tools/llvm-ar/mri-addlib.test
new file mode 100644
index 0000000000000..c297653e2abb1
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/mri-addlib.test
@@ -0,0 +1,45 @@
+## Test the ADDLIB MRI command.
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/f.o
+# RUN: llvm-ar r %t/f.a %t/f.o
+
+## Merge contents of archives.
+# RUN: echo "CREATE %t/addlib.a" > %t/addlib.mri
+# RUN: echo "ADDLIB %t/f.a" >> %t/addlib.mri
+# RUN: echo "SAVE" >> %t/addlib.mri
+# RUN: llvm-ar -M < %t/addlib.mri
+# RUN: llvm-nm --print-armap %t/addlib.a | FileCheck --check-prefix=SYMS %s
+# RUN: llvm-ar t %t/addlib.a | FileCheck --check-prefix=FILES %s
+
+# SYMS: f in {{.*}}
+# FILES: f.o
+
+## ADDLIB with non-archive file.
+# RUN: echo "CREATE %t/badlib.a" > %t/badlib.mri
+# RUN: echo "ADDLIB %s" >> %t/badlib.mri
+# RUN: echo "SAVE" >> %t/badlib.mri
+# RUN: not llvm-ar -M < %t/badlib.mri 2>&1 | FileCheck --check-prefix=PARSE %s
+# RUN: not ls %t/badlib.a
+
+# PARSE: Could not parse library
+
+## No create command.
+# RUN: echo "ADDLIB %t/f.a"  > %t/nocreate.mri
+# RUN: echo "SAVE" >> %t/nocreate.mri
+# RUN: not llvm-ar -M < %t/nocreate.mri
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+    - Name:    f
+      Binding: STB_GLOBAL
+      Section: .text
+...
diff --git a/llvm/test/tools/llvm-ar/mri-addmod.test b/llvm/test/tools/llvm-ar/mri-addmod.test
new file mode 100644
index 0000000000000..2b6e4dc3adbee
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/mri-addmod.test
@@ -0,0 +1,27 @@
+## Test the ADDMOD MRI command.
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/f.o
+
+# RUN: echo "CREATE %t/addmod.a" > %t/addmod.mri
+# RUN: echo "ADDMOD %t/f.o" >> %t/addmod.mri
+# RUN: echo "SAVE" >> %t/addmod.mri
+# RUN: llvm-ar -M < %t/addmod.mri
+# RUN: llvm-nm --print-armap %t/addmod.a | FileCheck %s
+
+# CHECK: f in f.o
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+    - Name:    f
+      Binding: STB_GLOBAL
+      Section: .text
+...
diff --git a/llvm/test/tools/llvm-ar/mri-comments.test b/llvm/test/tools/llvm-ar/mri-comments.test
new file mode 100644
index 0000000000000..f5fa61768aeb0
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/mri-comments.test
@@ -0,0 +1,19 @@
+# Test different MRI comment formats and white space.
+
+RUN: rm -rf %t && mkdir -p %t
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
+
+RUN: echo "create %t/mri.ar;comment" > %t/script.mri
+RUN: echo "addmod %t/elf.o * comment" >> %t/script.mri
+RUN: echo "; comment" >> %t/script.mri
+RUN: echo " ;comment" >> %t/script.mri
+RUN: echo "* comment" >> %t/script.mri
+RUN: echo " *comment" >> %t/script.mri
+RUN: echo "" >> %t/script.mri
+RUN: echo " " >> %t/script.mri
+RUN: echo " save" >> %t/script.mri
+
+RUN: llvm-ar -M < %t/script.mri
+RUN: llvm-ar t %t/mri.ar | FileCheck %s
+
+CHECK: elf.o
diff --git a/llvm/test/tools/llvm-ar/mri-end.test b/llvm/test/tools/llvm-ar/mri-end.test
new file mode 100644
index 0000000000000..db4abce7d180e
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/mri-end.test
@@ -0,0 +1,48 @@
+# The END MRI command is optional. Scripts that omit
+# or include END should be handled by llvm-ar.
+RUN: rm -rf %t && mkdir -p %t
+
+# Empty File
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
+
+RUN: touch %t/empty.mri
+RUN: llvm-ar -M < %t/empty.mri
+
+RUN: echo "END" > %t/empty-end.mri
+RUN: llvm-ar -M < %t/empty-end.mri
+
+# Comment only
+RUN: echo "; a comment" > %t/comment.mri
+RUN: llvm-ar -M < %t/comment.mri
+
+RUN: echo "; a comment" > %t/comment-end.mri
+RUN: echo "END" > %t/comment-end.mri
+RUN: llvm-ar -M < %t/comment-end.mri
+
+# Without Save
+RUN: echo "create %t/mri.ar" > %t/no-save.mri
+RUN: echo "addmod %t/elf.o" >> %t/no-save.mri
+RUN: llvm-ar -M < %t/no-save.mri
+RUN: test ! -e %t/mri.ar
+
+RUN: echo "create %t/mri.ar" > %t/no-save-end.mri
+RUN: echo "addmod %t/elf.o" >> %t/no-save-end.mri
+RUN: echo "END" > %t/no-save-end.mri
+RUN: llvm-ar -M < %t/no-save-end.mri
+RUN: test ! -e %t/mri.ar
+
+# With Save
+RUN: echo "create %t/mri.ar" > %t/save.mri
+RUN: echo "addmod %t/elf.o" >> %t/save.mri
+RUN: echo "save" >> %t/save.mri
+RUN: llvm-ar -M < %t/save.mri
+RUN: llvm-ar t %t/mri.ar | FileCheck %s
+
+RUN: echo "create %t/mri.ar" > %t/save-end.mri
+RUN: echo "addmod %t/elf.o" >> %t/save-end.mri
+RUN: echo "save" >> %t/save-end.mri
+RUN: echo "END" > %t/no-save-end.mri
+RUN: llvm-ar -M < %t/save-end.mri
+RUN: llvm-ar t %t/mri.ar | FileCheck %s
+
+CHECK: elf.o

From a5dc9c98352c396c0114403486fbd47e092d084a Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 16 Jul 2019 11:06:43 +0000
Subject: [PATCH 229/451] [Driver] Don't pass --dynamic-linker to ld on Solaris

I noticed that clang currently passes --dynamic-linker to ld.  This has been the case
since Solaris 11 support was added initially back in 2012 by David Chisnall (r150580).
I couldn't find any patch submission, let alone a justification, for this, and it seems
completely useless: --dynamic-linker is a gld compatibility form of the option, the
native option being -I.  First of all, however, the dynamic linker passed is simply the
default, so there's no reason at all to specify it in the first place.

This patch removes passing the option and adjusts the affected testcase accordingly.

Tested on x86_64-pc-solaris2.11 and sparcv9-sun-solaris2.11.

Differential Revision: https://reviews.llvm.org/D64493

llvm-svn: 366202
---
 clang/lib/Driver/ToolChains/Solaris.cpp                       | 4 ----
 clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1   | 0
 .../Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1  | 0
 .../test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1 | 0
 clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1     | 0
 clang/test/Driver/solaris-ld.c                                | 4 ----
 6 files changed, 8 deletions(-)
 delete mode 100644 clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1
 delete mode 100644 clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1
 delete mode 100644 clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1
 delete mode 100644 clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1

diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp
index c65b783739598..38f24d4cf7e74 100644
--- a/clang/lib/Driver/ToolChains/Solaris.cpp
+++ b/clang/lib/Driver/ToolChains/Solaris.cpp
@@ -65,10 +65,6 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-Bdynamic");
     if (Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-shared");
-    } else {
-      CmdArgs.push_back("--dynamic-linker");
-      CmdArgs.push_back(
-          Args.MakeArgString(getToolChain().GetFilePath("ld.so.1")));
     }
 
     // libpthread has been folded into libc since Solaris 10, no need to do
diff --git a/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1 b/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1 b/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1 b/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1 b/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/clang/test/Driver/solaris-ld.c b/clang/test/Driver/solaris-ld.c
index 2fc5c91272aa0..59d03c4d82253 100644
--- a/clang/test/Driver/solaris-ld.c
+++ b/clang/test/Driver/solaris-ld.c
@@ -11,7 +11,6 @@
 // CHECK-LD-SPARC32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "sparc-sun-solaris2.11"
 // CHECK-LD-SPARC32-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-LD-SPARC32: "{{.*}}ld{{(.exe)?}}"
-// CHECK-LD-SPARC32-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib{{/|\\\\}}ld.so.1"
 // CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crt1.o"
 // CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o"
 // CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtbegin.o"
@@ -35,7 +34,6 @@
 // CHECK-LD-SPARC64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "sparcv9-sun-solaris2.11"
 // CHECK-LD-SPARC64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-LD-SPARC64: "{{.*}}ld{{(.exe)?}}"
-// CHECK-LD-SPARC64-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}ld.so.1"
 // CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crt1.o"
 // CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}crti.o"
 // CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crtbegin.o"
@@ -59,7 +57,6 @@
 // CHECK-LD-X32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "i386-pc-solaris2.11"
 // CHECK-LD-X32-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-LD-X32: "{{.*}}ld{{(.exe)?}}"
-// CHECK-LD-X32-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib{{/|\\\\}}ld.so.1"
 // CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt1.o"
 // CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o"
 // CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4{{/|\\\\}}crtbegin.o"
@@ -83,7 +80,6 @@
 // CHECK-LD-X64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "x86_64-pc-solaris2.11"
 // CHECK-LD-X64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
 // CHECK-LD-X64: "{{.*}}ld{{(.exe)?}}"
-// CHECK-LD-X64-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}ld.so.1"
 // CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crt1.o"
 // CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crti.o"
 // CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64{{/|\\\\}}crtbegin.o"

From a1370877d7f4621a4243f0ae1c22dbbfd6483f35 Mon Sep 17 00:00:00 2001
From: George Rimar <grimar@accesssoftek.com>
Date: Tue, 16 Jul 2019 11:07:30 +0000
Subject: [PATCH 230/451] [Object/llvm-readelf/llvm-readobj] - Improve error
 reporting when e_shstrndx is broken.

When e_shstrndx is broken, it is impossible to get a section name.
In this patch I improved the error message we show and
added tests for Object and for llvm-readelf/llvm-readobj

Message was changed in two places:
1) llvm-readelf/llvm-readobj previously used a code from Object/ELF.h,
now they have a modified version of it (it has less checks and allows
dumping broken things).
2) Code in Object/ELF.h is still used for generic cases.

Differential revision: https://reviews.llvm.org/D64714

llvm-svn: 366203
---
 llvm/include/llvm/Object/ELF.h                |  5 ++--
 llvm/test/Object/invalid.test                 | 16 ++++++++++++
 .../llvm-readobj/elf-invalid-shstrndx.test    | 26 +++++++++++++++++++
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 16 +++++++++---
 llvm/tools/llvm-readobj/llvm-readobj.cpp      | 13 +++++-----
 llvm/tools/llvm-readobj/llvm-readobj.h        |  1 +
 6 files changed, 65 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test

diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 7bc6dc4620c7d..cf8e4529bad96 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -466,9 +466,10 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections) const {
 
   if (!Index) // no section string table.
     return "";
+  // TODO: Test a case when the sh_link of the section with index 0 is broken.
   if (Index >= Sections.size())
-    // TODO: this error is untested.
-    return createError("invalid section index");
+    return createError("section header string table index " + Twine(Index) +
+                       " does not exist");
   return getStringTable(&Sections[Index]);
 }
 
diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test
index 97ebb9f857393..5723c4aef3387 100644
--- a/llvm/test/Object/invalid.test
+++ b/llvm/test/Object/invalid.test
@@ -536,3 +536,19 @@ ProgramHeaders:
     FileSize: 0xffff0000
     Sections:
       - Section: .dynamic
+
+# RUN: yaml2obj --docnum=25 %s -o %t25
+# RUN: not obj2yaml 2>&1 %t25 | FileCheck %s -DFILE=%t25 --check-prefix=INVALID-SHSTRNDX
+
+# INVALID-SHSTRNDX: Error reading file: [[FILE]]: section header string table index 255 does not exist
+
+--- !ELF
+FileHeader:
+  Class:    ELFCLASS64
+  Data:     ELFDATA2LSB
+  Type:     ET_REL
+  Machine:  EM_X86_64
+  SHStrNdx: 0xFF
+Sections:
+  - Name: .foo
+    Type: SHT_PROGBITS
diff --git a/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test b/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test
new file mode 100644
index 0000000000000..6dc3b11dfd283
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test
@@ -0,0 +1,26 @@
+# RUN: yaml2obj %s -o %t
+# RUN: not llvm-readelf --headers -S 2>&1 %t | FileCheck %s -DFILE=%t --check-prefix=GNU
+# RUN: not llvm-readobj --headers -S 2>&1 %t | FileCheck %s -DFILE=%t --check-prefix=LLVM
+
+# GNU:      ELF Header:
+# GNU:        Section header string table index: 255
+# GNU-NEXT:   There are 4 section headers, starting at offset 0x40:
+# GNU:      Section Headers:
+# GNU-NEXT:  [Nr] Name
+# GNU-EMPTY:
+# GNU-NEXT:  error: '[[FILE]]': section header string table index 255 does not exist
+
+# LLVM:      ElfHeader {
+# LLVM:        StringTableSectionIndex: 255
+# LLVM-NEXT: }
+# LLVM-NEXT: Sections [
+# LLVM-EMPTY:
+# LLVM-NEXT: error: '[[FILE]]': section header string table index 255 does not exist
+
+--- !ELF
+FileHeader:
+  Class:     ELFCLASS64
+  Data:      ELFDATA2LSB
+  Type:      ET_REL
+  Machine:   EM_X86_64
+  SHStrNdx:  0xFF
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 589199c009f63..4e1cb7d544e79 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -183,6 +183,8 @@ template <typename ELFT> class ELFDumper : public ObjDumper {
 
   void printELFLinkerOptions() override;
 
+  const object::ELFObjectFile<ELFT> *getElfObject() const { return ObjF; };
+
 private:
   std::unique_ptr<DumpStyle<ELFT>> ELFDumperStyle;
 
@@ -3009,15 +3011,19 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) {
 
 template <class ELFT>
 static StringRef getSectionName(const typename ELFT::Shdr &Sec,
-                                const ELFFile<ELFT> &Obj,
+                                const ELFObjectFile<ELFT> &ElfObj,
                                 ArrayRef<typename ELFT::Shdr> Sections) {
+  const ELFFile<ELFT> &Obj = *ElfObj.getELFFile();
   uint32_t Index = Obj.getHeader()->e_shstrndx;
   if (Index == ELF::SHN_XINDEX)
     Index = Sections[0].sh_link;
   if (!Index) // no section string table.
     return "";
+  // TODO: Test a case when the sh_link of the section with index 0 is broken.
   if (Index >= Sections.size())
-    reportError("invalid section index");
+    reportError(ElfObj.getFileName(),
+                createError("section header string table index " +
+                            Twine(Index) + " does not exist"));
   StringRef Data = toStringRef(unwrapOrError(
       Obj.template getSectionContentsAsArray<uint8_t>(&Sections[Index])));
   return unwrapOrError(Obj.getSectionName(&Sec, Data));
@@ -3040,10 +3046,11 @@ void GNUStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
     printField(F);
   OS << "\n";
 
+  const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
   size_t SectionIndex = 0;
   for (const Elf_Shdr &Sec : Sections) {
     Fields[0].Str = to_string(SectionIndex);
-    Fields[1].Str = getSectionName(Sec, *Obj, Sections);
+    Fields[1].Str = getSectionName(Sec, *ElfObj, Sections);
     Fields[2].Str =
         getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type);
     Fields[3].Str =
@@ -4590,8 +4597,9 @@ void LLVMStyle<ELFT>::printSectionHeaders(const ELFO *Obj) {
 
   int SectionIndex = -1;
   ArrayRef<Elf_Shdr> Sections = unwrapOrError(Obj->sections());
+  const ELFObjectFile<ELFT> *ElfObj = this->dumper()->getElfObject();
   for (const Elf_Shdr &Sec : Sections) {
-    StringRef Name = getSectionName(Sec, *Obj, Sections);
+    StringRef Name = getSectionName(Sec, *ElfObj, Sections);
     DictScope SectionD(W, "Section");
     W.printNumber("Index", ++SectionIndex);
     W.printNumber("Name", Name, Sec.sh_name);
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index b6d0493af700d..1bd5bb74bf290 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -371,11 +371,18 @@ namespace opts {
 namespace llvm {
 
 LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
+  fouts().flush();
   errs() << "\n";
   WithColor::error(errs()) << Msg << "\n";
   exit(1);
 }
 
+void reportError(StringRef Input, Error Err) {
+  if (Input == "-")
+    Input = "<stdin>";
+  error(createFileError(Input, std::move(Err)));
+}
+
 void reportWarning(Twine Msg) {
   fouts().flush();
   errs() << "\n";
@@ -403,12 +410,6 @@ void error(std::error_code EC) {
 
 } // namespace llvm
 
-static void reportError(StringRef Input, Error Err) {
-  if (Input == "-")
-    Input = "<stdin>";
-  error(createFileError(Input, std::move(Err)));
-}
-
 static void reportError(StringRef Input, std::error_code EC) {
   reportError(Input, errorCodeToError(EC));
 }
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h
index ac8ced6121f49..0e02da4cb847a 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -22,6 +22,7 @@ namespace llvm {
 
   // Various helper functions.
   LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
+  void reportError(StringRef Input, Error Err); 
   void reportWarning(Twine Msg);
   void warn(llvm::Error Err);
   void error(std::error_code EC);

From eea828054d37ac2c7a025384c06eb603e5c0e189 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Tue, 16 Jul 2019 11:59:17 +0000
Subject: [PATCH 231/451] Document the LLVM_ENABLE_BINDINGS option

llvm-svn: 366204
---
 llvm/docs/CMake.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index 507a85d09526d..f9b384458db38 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -598,6 +598,12 @@ LLVM-specific variables
 **LLVM_USE_NEWPM**:BOOL
   If enabled, use the experimental new pass manager.
 
+**LLVM_USE_NEWPM**:BOOL
+  If enabled, use the experimental new pass manager.
+
+**LLVM_ENABLE_BINDINGS**:BOOL
+  If disabled, do not try to build the OCaml and go bindings.
+
 CMake Caches
 ============
 

From 116e58e2929dd32fa7e1e02aa8519085644ccf38 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Tue, 16 Jul 2019 12:05:54 +0000
Subject: [PATCH 232/451] remove a duplicate declaration

llvm-svn: 366205
---
 llvm/docs/CMake.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index f9b384458db38..91fb5282206f2 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -598,9 +598,6 @@ LLVM-specific variables
 **LLVM_USE_NEWPM**:BOOL
   If enabled, use the experimental new pass manager.
 
-**LLVM_USE_NEWPM**:BOOL
-  If enabled, use the experimental new pass manager.
-
 **LLVM_ENABLE_BINDINGS**:BOOL
   If disabled, do not try to build the OCaml and go bindings.
 

From 3a6aaa4391f058201d8cec737ab2e6903ef65892 Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Tue, 16 Jul 2019 12:53:59 +0000
Subject: [PATCH 233/451] Reapply [llvm-ar][test] Increase llvm-ar test
 coverage

This reapplies 365316 without extract.test due to failing on Darwin.

Differential Revision: https://reviews.llvm.org/D63935

llvm-svn: 366206
---
 llvm/test/tools/llvm-ar/create.test           | 14 ++++
 .../tools/llvm-ar/dash-before-letter.test     | 12 ++++
 llvm/test/tools/llvm-ar/default-add.test      | 22 ++----
 llvm/test/tools/llvm-ar/delete.test           | 67 ++++++++++++++++++
 llvm/test/tools/llvm-ar/insert-after.test     | 59 ++++++++++++++++
 llvm/test/tools/llvm-ar/insert-before.test    | 59 ++++++++++++++++
 llvm/test/tools/llvm-ar/move-after.test       | 59 ++++++++++++++++
 llvm/test/tools/llvm-ar/move-before.test      | 69 ++++++++++++++++++
 llvm/test/tools/llvm-ar/symtab.test           | 70 +++++++++++++++++++
 9 files changed, 413 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/tools/llvm-ar/create.test
 create mode 100644 llvm/test/tools/llvm-ar/dash-before-letter.test
 create mode 100644 llvm/test/tools/llvm-ar/delete.test
 create mode 100644 llvm/test/tools/llvm-ar/insert-after.test
 create mode 100644 llvm/test/tools/llvm-ar/insert-before.test
 create mode 100644 llvm/test/tools/llvm-ar/move-after.test
 create mode 100644 llvm/test/tools/llvm-ar/move-before.test
 create mode 100644 llvm/test/tools/llvm-ar/symtab.test

diff --git a/llvm/test/tools/llvm-ar/create.test b/llvm/test/tools/llvm-ar/create.test
new file mode 100644
index 0000000000000..7e4951da5fa2b
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/create.test
@@ -0,0 +1,14 @@
+## Test the creation warning and supression of that warning.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+
+RUN: rm -f %t.warning.ar
+RUN: llvm-ar r %t.warning.ar %t1.txt %t2.txt 2>&1 \
+RUN:   | FileCheck %s -DOUTPUT=%t.warning.ar
+
+CHECK: warning: creating [[OUTPUT]]
+
+RUN: rm -f %t.supressed.ar
+RUN: llvm-ar cr %t.supressed.ar %t1.txt %t2.txt 2>&1 \
+RUN:   | FileCheck --allow-empty /dev/null --implicit-check-not={{.}}
diff --git a/llvm/test/tools/llvm-ar/dash-before-letter.test b/llvm/test/tools/llvm-ar/dash-before-letter.test
new file mode 100644
index 0000000000000..3002a6de7ef90
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/dash-before-letter.test
@@ -0,0 +1,12 @@
+# Test the use of dash before key letters.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+
+RUN: rm -f %t.ar
+RUN: llvm-ar s -cr %t.ar %t1.txt
+RUN: llvm-ar -r -s %t.ar %t2.txt -s
+RUN: llvm-ar -t %t.ar | FileCheck %s
+
+CHECK:      1.txt
+CHECK-NEXT: 2.txt
diff --git a/llvm/test/tools/llvm-ar/default-add.test b/llvm/test/tools/llvm-ar/default-add.test
index 460965ac30582..e1ed69669442a 100644
--- a/llvm/test/tools/llvm-ar/default-add.test
+++ b/llvm/test/tools/llvm-ar/default-add.test
@@ -1,34 +1,20 @@
 RUN: yaml2obj %S/Inputs/macho.yaml -o %t-macho.o
 RUN: yaml2obj %S/Inputs/coff.yaml -o %t-coff.o
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t-elf.o
 
 RUN: rm -f %t.ar
 RUN: llvm-ar crs %t.ar %t-macho.o
 RUN: grep -q __.SYMDEF %t.ar
-Test that an option string prefixed by a dash works.
-RUN: llvm-ar -crs %t.ar %t-coff.o
-RUN: grep -q __.SYMDEF %t.ar
 
 RUN: rm -f %t.ar
 RUN: llvm-ar crs %t.ar %t-coff.o
 RUN: not grep -q __.SYMDEF %t.ar
 RUN: llvm-ar crs %t.ar %t-macho.o
 RUN: not grep -q __.SYMDEF %t.ar
-
-RUN: rm -f %t.ar
-Test that multiple dashed options works.
-RUN: llvm-ar -c -r -s %t.ar %t-macho.o
-RUN: grep -q __.SYMDEF %t.ar
-Test with duplicated options.
-RUN: llvm-ar -c -r -s -c -s %t.ar %t-coff.o
-RUN: grep -q __.SYMDEF %t.ar
+RUN: llvm-ar crs %t.ar %t-elf.o
+RUN: not grep -q __.SYMDEF %t.ar
 
 RUN: rm -f %t.ar
 Test with the options in a different order.
 RUN: llvm-ar rsc %t.ar %t-macho.o
-RUN: grep -q __.SYMDEF %t.ar
-Test with options everywhere.
-RUN: llvm-ar rsc -cs -sc %t.ar %t-coff.o -cs -sc
-RUN: grep -q __.SYMDEF %t.ar
-
-Ensure that we select the existing format when updating.
-
+RUN: grep -q __.SYMDEF %t.ar
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-ar/delete.test b/llvm/test/tools/llvm-ar/delete.test
new file mode 100644
index 0000000000000..d5ab797664173
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/delete.test
@@ -0,0 +1,67 @@
+## Test the deletion of members and that symbols are removed from the symbol table.
+
+# RUN: yaml2obj %s -o %t-delete.o --docnum=1
+# RUN: yaml2obj %s -o %t-keep.o --docnum=2
+# RUN: touch %t1.txt
+# RUN: touch %t2.txt
+
+## Add file:
+# RUN: rm -f %t.a
+# RUN: llvm-ar rc %t.a %t1.txt %t-delete.o %t-keep.o %t2.txt
+# RUN: llvm-nm --print-armap %t.a \
+# RUN:   | FileCheck %s --check-prefix=SYMBOL-ADDED
+# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=FILE-ADDED
+
+# SYMBOL-ADDED:      symbol1
+# SYMBOL-ADDED-NEXT: symbol2
+
+# FILE-ADDED:      1.txt
+# FILE-ADDED-NEXT: delete.o
+# FILE-ADDED-NEXT: keep.o
+# FILE-ADDED-NEXT: 2.txt
+
+## Delete file that is not a member:
+# RUN: cp %t.a %t-archive-copy.a
+# RUN: llvm-ar d %t.a t/missing.o
+# RUN: cmp %t.a %t-archive-copy.a
+
+## Delete file:
+# RUN: llvm-ar d %t.a %t-delete.o
+# RUN: llvm-nm --print-armap %t.a \
+# RUN:   | FileCheck %s --check-prefix=SYMBOL-DELETED --implicit-check-not symbol1
+# RUN: llvm-ar t %t.a \
+# RUN:   | FileCheck %s --check-prefix=FILE-DELETED --implicit-check-not delete.o
+
+# SYMBOL-DELETED: symbol2
+
+# FILE-DELETED:      1.txt
+# FILE-DELETED-NEXT: keep.o
+# FILE-DELETED-NEXT: 2.txt
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/insert-after.test b/llvm/test/tools/llvm-ar/insert-after.test
new file mode 100644
index 0000000000000..cd8ee9409c6b4
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/insert-after.test
@@ -0,0 +1,59 @@
+## Test inserting files after a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Insert one file:
+RUN: rm -f %t-one.a
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt
+RUN: llvm-ar ra %t1.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Insert file at back:
+RUN: rm -f %t-back.a
+RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt
+RUN: llvm-ar ra %t2.txt %t-back.a %t3.txt
+RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK
+
+BACK:      1.txt
+BACK-NEXT: 2.txt
+BACK-NEXT: 3.txt
+
+# Insert multiple files:
+RUN: rm -f %t-multiple.a
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt
+RUN: llvm-ar ra %t1.txt %t-multiple.a %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 2.txt
+
+# Insert after invalid file:
+RUN: rm -f %t-invalid.a
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar ra invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Insert file at the same position:
+RUN: rm -f %t-position.a
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ra %t1.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/insert-before.test b/llvm/test/tools/llvm-ar/insert-before.test
new file mode 100644
index 0000000000000..61505d8162aad
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/insert-before.test
@@ -0,0 +1,59 @@
+## Test inserting files before a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Insert one file:
+RUN: rm -f %t-one.a
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt
+RUN: llvm-ar rb %t2.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Insert file at front:
+RUN: rm -f %t-front.a
+RUN: llvm-ar rc %t-front.a %t1.txt %t2.txt
+RUN: llvm-ar rb %t1.txt %t-front.a %t3.txt
+RUN: llvm-ar t %t-front.a | FileCheck %s --check-prefix=FRONT
+
+FRONT:      3.txt
+FRONT-NEXT: 1.txt
+FRONT-NEXT: 2.txt
+
+# Insert multiple files:
+RUN: rm -f %t-multiple.a
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt
+RUN: llvm-ar rb %t2.txt %t-multiple.a %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 2.txt
+
+# Insert before an invalid file:
+RUN: rm -f %t-invalid.a
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar rb invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Insert file at the same position:
+RUN: rm -f %t-position.a
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar rb %t3.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/move-after.test b/llvm/test/tools/llvm-ar/move-after.test
new file mode 100644
index 0000000000000..fb61f2aa118f6
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/move-after.test
@@ -0,0 +1,59 @@
+## Test moving files after a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Move one file:
+RUN: rm -f %t-one.ar
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ma %t1.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Move file to back:
+RUN: rm -f %t-back.ar
+RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ma %t2.txt %t-back.a %t1.txt
+RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK
+
+BACK:      2.txt
+BACK-NEXT: 1.txt
+BACK-NEXT: 3.txt
+
+# Move multiple files:
+RUN: rm -f %t-multiple.ar
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt
+RUN: llvm-ar ma %t1.txt %t-multiple.a  %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 2.txt
+
+# Move after invalid file:
+RUN: rm -f %t-invalid.ar
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar ma invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Move file to the same position:
+RUN: rm -f %t-position.ar
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ma %t1.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/move-before.test b/llvm/test/tools/llvm-ar/move-before.test
new file mode 100644
index 0000000000000..b5fefec29557a
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/move-before.test
@@ -0,0 +1,69 @@
+## Test moving files after a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Move one file:
+RUN: rm -f %t-one.ar
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t2.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Move file to front:
+RUN: rm -f %t-front.ar
+RUN: llvm-ar rc %t-front.ar %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t1.txt %t-front.ar %t3.txt
+RUN: llvm-ar t %t-front.ar | FileCheck %s --check-prefix=FRONT
+
+FRONT:      3.txt
+FRONT-NEXT: 1.txt
+FRONT-NEXT: 2.txt
+
+# Move multiple files:
+RUN: rm -f %t-multiple.ar
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt
+RUN: llvm-ar mb %t2.txt %t-multiple.a %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 2.txt
+
+# Move before invalid file:
+RUN: rm -f %t-invalid.ar
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar mb invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Move file to the same position:
+RUN: rm -f %t-position.ar
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t3.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
+
+# Move file after itself:
+RUN: rm -f %t-same.ar
+RUN: llvm-ar rc %t-same.ar %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t2.txt %t-same.ar %t2.txt
+RUN: llvm-ar t %t-same.ar | FileCheck %s --check-prefix=SAME
+
+SAME:      1.txt
+SAME-NEXT: 2.txt
+SAME-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/symtab.test b/llvm/test/tools/llvm-ar/symtab.test
new file mode 100644
index 0000000000000..e59a468d3f2f0
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/symtab.test
@@ -0,0 +1,70 @@
+## Test the s and S modifiers. Build and do not build a symbol table.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: touch %t-other.txt
+
+## Default:
+# RUN: rm -f %t-default.a
+# RUN: llvm-ar rc %t-default.a %t.o
+# RUN: llvm-nm --print-armap %t-default.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+## Use a modifer:
+# RUN: rm -f %t-symtab.a
+# RUN: llvm-ar rcs %t-symtab.a %t.o
+# RUN: llvm-nm --print-armap %t-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: rm -f %t-no-symtab.a
+# RUN: llvm-ar rcS %t-no-symtab.a %t.o
+# RUN: llvm-nm --print-armap %t-no-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+## Use both modifers:
+# RUN: rm -f %t-symtab-last.a
+# RUN: llvm-ar rcSs %t-symtab-last.a %t.o
+# RUN: llvm-nm --print-armap %t-symtab-last.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: rm -f %t-no-symtab-last.a
+# RUN: llvm-ar rcsS %t-no-symtab-last.a %t.o
+# RUN: llvm-nm --print-armap %t-no-symtab-last.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+## Use an existing archive:
+# RUN: rm -f %t-to-symtab.a
+# RUN: llvm-ar rcS %t-to-symtab.a %t.o
+# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: rm -f %t-to-no-symtab.a
+# RUN: llvm-ar rcs %t-to-no-symtab.a %t.o
+# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-no-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-no-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+# SYMTAB:        symbol in
+# NO-SYMTAB-NOT: symbol in
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol
+    Binding: STB_GLOBAL
+    Section: .text

From aae7553b3463558c2f7ad40de1b60b989fea0f05 Mon Sep 17 00:00:00 2001
From: Johan Vikstrom <jvikstrom@google.com>
Date: Tue, 16 Jul 2019 13:23:12 +0000
Subject: [PATCH 234/451] [clangd] Added highlighting for the targets in
 typedefs and using.

Summary:
In `typedef int A` the `A` was not highlighted previously.

This patch gives `A` the same kind of highlighting that the underlying type has (class/enum) (which in this example is no special highlighting because builtins are not handled yet)
Will add highlightings for built ins in another patch.

Reviewers: hokein, sammccall, ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64754

llvm-svn: 366207
---
 .../clangd/SemanticHighlighting.cpp              | 16 +++++++++++++---
 .../unittests/SemanticHighlightingTests.cpp      | 15 ++++++++++++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index d64472d8fdb1d..b6b1c8d8a2cf8 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -93,6 +93,12 @@ class HighlightingTokenCollector
     return true;
   }
 
+  bool VisitTypedefNameDecl(TypedefNameDecl *TD) {
+    if(const auto *TSI = TD->getTypeSourceInfo())
+      addTypeLoc(TD->getLocation(), TSI->getTypeLoc());
+    return true;
+  }
+
   bool VisitTypeLoc(TypeLoc &TL) {
     // This check is for not getting two entries when there are anonymous
     // structs. It also makes us not highlight certain namespace qualifiers
@@ -101,9 +107,7 @@ class HighlightingTokenCollector
     if (TL.getTypeLocClass() == TypeLoc::TypeLocClass::Elaborated)
       return true;
 
-    if (const Type *TP = TL.getTypePtr())
-      if (const TagDecl *TD = TP->getAsTagDecl())
-        addToken(TL.getBeginLoc(), TD);
+    addTypeLoc(TL.getBeginLoc(), TL);
     return true;
   }
 
@@ -118,6 +122,12 @@ class HighlightingTokenCollector
   }
 
 private:
+  void addTypeLoc(SourceLocation Loc, const TypeLoc &TL) {
+    if (const Type *TP = TL.getTypePtr())
+      if (const TagDecl *TD = TP->getAsTagDecl())
+        addToken(Loc, TD);
+  }
+
   void addToken(SourceLocation Loc, const NamedDecl *D) {
     if (D->getDeclName().isIdentifier() && D->getName().empty())
       // Don't add symbols that don't have any length.
diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index 3a1b1c3e7057f..f69e336253ca7 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -90,7 +90,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
         typename T::A* $Field[[D]];
       };
       $Namespace[[abc]]::$Class[[A]]<int> $Variable[[AA]];
-      typedef $Namespace[[abc]]::$Class[[A]]<int> AAA;
+      typedef $Namespace[[abc]]::$Class[[A]]<int> $Class[[AAA]];
       struct $Class[[B]] {
         $Class[[B]]();
         ~$Class[[B]]();
@@ -173,6 +173,19 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
       }
       int $Variable[[B]];
       $Class[[AA]] $Variable[[A]]{$Variable[[B]]};
+    )cpp",
+    R"cpp(
+      namespace $Namespace[[a]] {
+        struct $Class[[A]] {};
+      }
+      typedef $Namespace[[a]]::$Class[[A]] $Class[[B]];
+      using $Class[[BB]] = $Namespace[[a]]::$Class[[A]];
+      enum class $Enum[[E]] {};
+      typedef $Enum[[E]] $Enum[[C]];
+      typedef $Enum[[C]] $Enum[[CC]];
+      using $Enum[[CD]] = $Enum[[CC]];
+      $Enum[[CC]] $Function[[f]]($Class[[B]]);
+      $Enum[[CD]] $Function[[f]]($Class[[BB]]);
     )cpp"};
   for (const auto &TestCase : TestCases) {
     checkHighlightings(TestCase);

From 58864fad39bf291a233bd64309546682be91ce7a Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 16 Jul 2019 13:45:10 +0000
Subject: [PATCH 235/451] [pstl] Fix compilation with TBB backend

Some types were not using the right namespace qualification.

llvm-svn: 366208
---
 .../include/pstl/internal/parallel_backend_tbb.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h
index a32fd995c750b..8ac385e3992db 100644
--- a/pstl/include/pstl/internal/parallel_backend_tbb.h
+++ b/pstl/include/pstl/internal/parallel_backend_tbb.h
@@ -191,7 +191,7 @@ _Tp
 __parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine,
                             _Rp __brick_reduce)
 {
-    __par_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce);
+    __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce);
     // The grain size of 3 is used in order to provide mininum 2 elements for each body
     tbb::this_task_arena::isolate(
         [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); });
@@ -305,9 +305,9 @@ __upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize
     {
         _Index __k = __split(__m);
         tbb::parallel_invoke(
-            [=] { __par_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); },
+            [=] { __tbb_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); },
             [=] {
-                __par_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine);
+                __tbb_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine);
             });
         if (__m == 2 * __k)
             __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]);
@@ -325,11 +325,11 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi
     {
         const _Index __k = __split(__m);
         tbb::parallel_invoke(
-            [=] { __par_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); },
+            [=] { __tbb_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); },
             // Assumes that __combine never throws.
             //TODO: Consider adding a requirement for user functors to be constant.
             [=, &__combine] {
-                __par_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize,
+                __tbb_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize,
                                            __combine(__initial, __r[__k - 1]), __combine, __scan);
             });
     }
@@ -363,7 +363,7 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu
             _Index __m = (__n - 1) / __tilesize;
             __buffer<_Tp> __buf(__m + 1);
             _Tp* __r = __buf.get();
-            __par_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce,
+            __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce,
                                      __combine);
 
             // When __apex is a no-op and __combine has no side effects, a good optimizer
@@ -375,7 +375,7 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu
             while ((__k &= __k - 1))
                 __t = __combine(__r[__k - 1], __t);
             __apex(__combine(__initial, __t));
-            __par_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial,
+            __tbb_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial,
                                        __combine, __scan);
             return;
         }
@@ -874,7 +874,7 @@ template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, type
 tbb::task*
 __stable_sort_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::execute()
 {
-    typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __serial_destroy,
+    typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy,
                          __utils::__serial_move_merge>
         _MergeTaskType;
 

From 8e482eb5cbb8b6839f9de701a43f1e2257ea3b34 Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Tue, 16 Jul 2019 14:25:37 +0000
Subject: [PATCH 236/451] [llvm-ar][test] Add to llvm-ar test coverage

This change adds tests to cover existing llvm-ar functionality.
print.test is omitted due to failing on Darwin.

Differential Revision: https://reviews.llvm.org/D64330

llvm-svn: 366209
---
 llvm/test/tools/llvm-ar/move.test             | 129 ++++++++++++++++++
 llvm/test/tools/llvm-ar/quick-append.test     |  93 +++++++++++++
 .../test/tools/llvm-ar/read-only-archive.test |  30 ++++
 3 files changed, 252 insertions(+)
 create mode 100644 llvm/test/tools/llvm-ar/move.test
 create mode 100644 llvm/test/tools/llvm-ar/quick-append.test
 create mode 100644 llvm/test/tools/llvm-ar/read-only-archive.test

diff --git a/llvm/test/tools/llvm-ar/move.test b/llvm/test/tools/llvm-ar/move.test
new file mode 100644
index 0000000000000..c5028f52e405c
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/move.test
@@ -0,0 +1,129 @@
+## Test the move command without modifiers moves members to the end
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+# RUN: yaml2obj %s -o %t/3.o --docnum=3
+
+## Move single member:
+# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/single.a %t/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
+
+# SINGLE:      2.o
+# SINGLE-NEXT: 3.o
+# SINGLE-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM:      symbol2
+# SINGLE-SYM-NEXT: symbol3
+# SINGLE-SYM-NEXT: symbol1
+
+## Move multiple members:
+# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/multiple.a %t/1.o %t/2.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
+
+# MULTIPLE:      3.o
+# MULTIPLE-NEXT: 1.o
+# MULTIPLE-NEXT: 2.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbol3
+# MULTIPLE-SYM-NEXT: symbol1
+# MULTIPLE-SYM-NEXT: symbol2
+
+## Move same member:
+# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/same.a %t/1.o %t/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o
+
+# SAME:      2.o
+# SAME-NEXT: 3.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbol2
+# SAME-SYM-NEXT: symbol3
+# SAME-SYM-NEXT: symbol1
+
+## Move without member:
+# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/without.a
+# RUN: llvm-ar t %t/without.a \
+# RUN:   | FileCheck %s --match-full-lines --check-prefix=WITHOUT --implicit-check-not {{.}}
+
+# WITHOUT:      1.o
+# WITHOUT-NEXT: 2.o
+# WITHOUT-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/without.a \
+# RUN:   | FileCheck %s --check-prefix=WITHOUT-SYM
+
+# WITHOUT-SYM:      symbol1
+# WITHOUT-SYM-NEXT: symbol2
+# WITHOUT-SYM-NEXT: symbol3
+
+## No archive:
+# RUN: not llvm-ar m 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+# NO-ARCHIVE: error: An archive name must be specified.
+
+## Member does not exist:
+# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o
+# RUN: not llvm-ar m %t/missing.a %t/missing.txt 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
+
+# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory.
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol3
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/quick-append.test b/llvm/test/tools/llvm-ar/quick-append.test
new file mode 100644
index 0000000000000..ef2e5f74cc808
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/quick-append.test
@@ -0,0 +1,93 @@
+## Test quick append 
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+
+## Append single member:
+# RUN: llvm-ar qc %t/single.a %t/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
+
+# SINGLE: 1.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM: symbol1
+
+## Append multiple members:
+# RUN: llvm-ar qc %t/multiple.a %t/1.o %t/2.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
+
+# MULTIPLE:      1.o
+# MULTIPLE-NEXT: 2.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbol1
+# MULTIPLE-SYM-NEXT: symbol2
+
+## Append same member:
+# RUN: llvm-ar qc %t/same.a %t/1.o %t/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --match-full-lines --implicit-check-not {{.}}
+
+# SAME:      1.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbol1
+# SAME-SYM-NEXT: symbol1
+
+## Append without member:
+# RUN: llvm-ar qc %t/without.a
+# RUN: llvm-ar t %t/without.a \
+# RUN:   | FileCheck /dev/null --allow-empty --implicit-check-not={{.}}
+
+# RUN: llvm-nm --print-armap %t/without.a \
+# RUN:   | FileCheck /dev/null --allow-empty --implicit-check-not={{.}}
+
+## No archive:
+# RUN: not llvm-ar qc 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+# NO-ARCHIVE: error: An archive name must be specified.
+
+## Member does not exist:
+# RUN: not llvm-ar qc %t/missing.a %t/missing.txt 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
+
+# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory.
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/read-only-archive.test b/llvm/test/tools/llvm-ar/read-only-archive.test
new file mode 100644
index 0000000000000..f4adea20218c0
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/read-only-archive.test
@@ -0,0 +1,30 @@
+## Test that read-only archives cannot be edited
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: touch %t/1.txt
+# RUN: touch %t/2.txt
+# RUN: touch %t/3.txt
+
+# RUN: llvm-ar rc %t/archive.a %t/1.txt %t/2.txt
+
+## Make read only:
+# RUN: chmod 444 %t/archive.a
+
+# RUN: not llvm-ar r %t/archive.a %t/3.txt \
+# RUN:   FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a
+
+# ERROR: error: [[FILE]]: no such file or directory.
+
+# RUN: not llvm-ar q %t/archive.a %t/3.txt \
+# RUN:   FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a
+
+# RUN: not llvm-ar m %t/archive.a t/1.txt \
+# RUN:   FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a
+
+# ERROR-2: error: [[FILE]]: permission denied.
+
+# RUN: llvm-ar t %t/archive.a \
+# RUN:   | FileCheck %s --check-prefix=ARCHIVE --match-full-lines --implicit-check-not {{.}}
+
+# ARCHIVE: 1.txt
+# ARCHIVE-NEXT: 2.txt

From 22c4a147a96447b38ce90e59b27c33079f1aa203 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 14:28:30 +0000
Subject: [PATCH 237/451] AMDGPU/GlobalISel: Fix test failures in release build

Apparently the check for legal instructions during instruction
select does not happen without an asserts build, so these would
successfully select in release, and fail in debug.

Make s16 and/or/xor legal. These can just be selected directly
to the 32-bit operation, as is already done in SelectionDAG, so just
make them legal.

llvm-svn: 366210
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   7 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   2 +-
 .../AMDGPU/GlobalISel/inst-select-and.mir     |  21 +-
 .../AMDGPU/GlobalISel/inst-select-or.mir      |  21 +-
 .../AMDGPU/GlobalISel/inst-select-xor.mir     |  21 +-
 .../AMDGPU/GlobalISel/legalize-and.mir        |  10 +-
 .../AMDGPU/GlobalISel/legalize-ashr.mir       |  52 ++--
 .../AMDGPU/GlobalISel/legalize-fcopysign.mir  | 246 +++++++--------
 .../AMDGPU/GlobalISel/legalize-lshr.mir       |  48 ++-
 .../GlobalISel/legalize-merge-values.mir      | 286 +++++++++---------
 .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir |  20 +-
 .../AMDGPU/GlobalISel/legalize-shl.mir        |  62 ++--
 .../AMDGPU/GlobalISel/legalize-umax.mir       |  28 +-
 .../AMDGPU/GlobalISel/legalize-umin.mir       |  28 +-
 .../AMDGPU/GlobalISel/legalize-xor.mir        |  20 +-
 15 files changed, 406 insertions(+), 466 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fd5ee293d6cfb..aa634e881d870 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -291,10 +291,13 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
   // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
   // the result?
   if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
-    const TargetRegisterClass *RC
-      = TRI.getConstrainedRegClassForOperand(Dst, MRI);
     unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
     I.setDesc(TII.get(InstOpc));
+
+    const TargetRegisterClass *RC
+      = TRI.getConstrainedRegClassForOperand(Dst, MRI);
+    if (!RC)
+      return false;
     return RBI.constrainGenericRegister(DstReg, *RC, MRI) &&
            RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) &&
            RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3d1f7f404c918..3cf4fbc752493 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -213,7 +213,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   // Report legal for any types we can handle anywhere. For the cases only legal
   // on the SALU, RegBankSelect will be able to re-legalize.
   getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
-    .legalFor({S32, S1, S64, V2S32, V2S16, V4S16})
+    .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
     .clampScalar(0, S32, S64)
     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
     .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
index f00452b5564ec..d3b877d72e28c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir
@@ -117,20 +117,17 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; WAVE64-LABEL: name: and_s16_sgpr_sgpr_sgpr
     ; WAVE64: liveins: $sgpr0, $sgpr1
-    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
-    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
-    ; WAVE64: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]]
-    ; WAVE64: S_ENDPGM 0, implicit [[AND]](s16)
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B32_]]
     ; WAVE32-LABEL: name: and_s16_sgpr_sgpr_sgpr
     ; WAVE32: liveins: $sgpr0, $sgpr1
-    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
-    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
-    ; WAVE32: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]]
-    ; WAVE32: S_ENDPGM 0, implicit [[AND]](s16)
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
     %2:sgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
index 4e120e7c7dd19..d102761158159 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir
@@ -117,20 +117,17 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; WAVE64-LABEL: name: or_s16_sgpr_sgpr_sgpr
     ; WAVE64: liveins: $sgpr0, $sgpr1
-    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
-    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
-    ; WAVE64: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]]
-    ; WAVE64: S_ENDPGM 0, implicit [[OR]](s16)
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B32_]]
     ; WAVE32-LABEL: name: or_s16_sgpr_sgpr_sgpr
     ; WAVE32: liveins: $sgpr0, $sgpr1
-    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
-    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
-    ; WAVE32: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]]
-    ; WAVE32: S_ENDPGM 0, implicit [[OR]](s16)
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
     %2:sgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
index 3035e022646f0..74555ab9940ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir
@@ -117,20 +117,17 @@ body: |
     liveins: $sgpr0, $sgpr1
     ; WAVE64-LABEL: name: xor_s16_sgpr_sgpr_sgpr
     ; WAVE64: liveins: $sgpr0, $sgpr1
-    ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
-    ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
-    ; WAVE64: [[XOR:%[0-9]+]]:sgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
-    ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s16)
+    ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B32_]]
     ; WAVE32-LABEL: name: xor_s16_sgpr_sgpr_sgpr
     ; WAVE32: liveins: $sgpr0, $sgpr1
-    ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
-    ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
-    ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
-    ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
-    ; WAVE32: [[XOR:%[0-9]+]]:sgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
-    ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s16)
+    ; WAVE32: $vcc_hi = IMPLICIT_DEF
+    ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1
+    ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]]
+    ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
     %2:sgpr(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
index e95e063212dd0..7b578420a6ea2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir
@@ -156,11 +156,11 @@ body: |
     ; CHECK-LABEL: name: test_and_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
index 21f4ffa4a0ba9..6b5451935f40c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
@@ -223,11 +223,10 @@ body: |
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]]
+    ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_ashr_s16_i8
@@ -235,11 +234,10 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
@@ -274,32 +272,30 @@ body: |
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
-    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC1]](s16)
-    ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16)
-    ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[TRUNC]](s16)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC2]](s16)
+    ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16)
+    ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_ashr_i8_i8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
-    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC1]](s16)
-    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[TRUNC]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC2]](s16)
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
index 5d590ff17b4dc..d676f6711c620 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir
@@ -12,48 +12,42 @@ body: |
     ; SI-LABEL: name: test_copysign_s16_s16
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY4]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[COPY7]](s32)
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
+    ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_copysign_s16_s16
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY4]]
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[COPY7]](s32)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_copysign_s16_s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY4]]
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[COPY7]](s32)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -252,57 +246,51 @@ body: |
     ; SI-LABEL: name: test_copysign_s16_s32
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[COPY8]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]]
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_copysign_s16_s32
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]]
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[COPY8]](s32)
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]]
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_copysign_s16_s32
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]]
-    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[COPY8]](s32)
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]]
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -433,60 +421,54 @@ body: |
     ; SI-LABEL: name: test_copysign_s16_s64
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
-    ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32)
-    ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[COPY4]]
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; SI: $vgpr0 = COPY [[COPY7]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
+    ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC3]](s32)
+    ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
+    ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[TRUNC1]]
+    ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_copysign_s16_s64
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
-    ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[COPY4]]
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; VI: $vgpr0 = COPY [[COPY7]](s32)
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
+    ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC3]](s32)
+    ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[TRUNC1]]
+    ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_copysign_s16_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[COPY4]]
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]]
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; GFX9: $vgpr0 = COPY [[COPY7]](s32)
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64)
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC3]](s32)
+    ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[TRUNC1]]
+    ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s64) = COPY $vgpr1_vgpr2
     %2:_(s16) = G_TRUNC %0
@@ -906,57 +888,51 @@ body: |
     ; SI-LABEL: name: test_copysign_s16_s32_flags
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; SI: %14:_(s32) = nnan G_OR [[COPY6]], [[COPY7]]
-    ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY %14(s32)
-    ; SI: $vgpr0 = COPY [[COPY8]](s32)
+    ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]]
+    ; SI: %3:_(s16) = nnan G_OR [[AND]], [[AND1]]
+    ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16)
+    ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; VI-LABEL: name: test_copysign_s16_s32_flags
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; VI: %14:_(s32) = nnan G_OR [[COPY6]], [[COPY7]]
-    ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY %14(s32)
-    ; VI: $vgpr0 = COPY [[COPY8]](s32)
+    ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]]
+    ; VI: %3:_(s16) = nnan G_OR [[AND]], [[AND1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16)
+    ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_copysign_s16_s32_flags
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]]
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32)
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32)
-    ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32)
-    ; GFX9: %14:_(s32) = nnan G_OR [[COPY6]], [[COPY7]]
-    ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY %14(s32)
-    ; GFX9: $vgpr0 = COPY [[COPY8]](s32)
+    ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]]
+    ; GFX9: %3:_(s16) = nnan G_OR [[AND]], [[AND1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
index 82d1c5a896b9f..48a3e4c288b33 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir
@@ -219,11 +219,10 @@ body: |
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]]
+    ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_lshr_s16_i8
@@ -231,11 +230,10 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
@@ -268,30 +266,24 @@ body: |
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC]](s16)
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]]
+    ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_lshr_i8_i8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC]](s16)
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
index fc2ba600340a4..7438180111db7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
@@ -15,53 +15,51 @@ body: |
     ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
     ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
     ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
-    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]]
+    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]]
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY11]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32)
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]]
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY15]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]]
     ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]]
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[AND6]](s32)
-    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C2]]
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[COPY19]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]]
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]]
     ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C1]]
-    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32)
-    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C2]]
-    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[COPY23]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
+    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]]
+    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC6]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[TRUNC]]
+    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]]
+    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[TRUNC8]]
+    ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16)
     ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](p1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
@@ -91,20 +89,22 @@ body: |
     ; CHECK-LABEL: name: test_merge_s16_s8_s8
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]]
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]]
+    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s16) = G_MERGE_VALUES %0, %1
@@ -160,31 +160,31 @@ body: |
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C5]]
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C6]]
+    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]]
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY7]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]]
+    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16)
     ; CHECK: $vgpr0 = COPY [[MV]](s32)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
@@ -424,75 +424,71 @@ body: |
     ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
     ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
     ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C13]]
-    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C12]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C14]]
+    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C14]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C14]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32)
-    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C13]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C14]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C14]]
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY7]]
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C15]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]]
     ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C13]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C14]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[AND6]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C14]]
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[COPY11]]
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C14]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C15]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]]
     ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C13]]
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C14]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32)
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C14]]
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[COPY15]]
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C14]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C15]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC6]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[C6]](s32)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[TRUNC]]
     ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C13]]
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C14]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[AND12]](s32)
-    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C14]]
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[COPY19]]
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[OR4]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C14]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C15]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[TRUNC8]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[C8]](s32)
+    ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[TRUNC]]
     ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
-    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C13]]
-    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C14]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[AND15]](s32)
-    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C14]]
-    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[COPY23]]
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[OR5]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16)
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C14]]
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C15]]
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[AND13]](s32)
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC10]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[C10]](s32)
+    ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[TRUNC]]
+    ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C20]](s32)
+    ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C14]]
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C15]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[AND16]](s32)
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND15]], [[TRUNC12]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16)
     ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
index 88a15298c36f3..054686050a9a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir
@@ -156,11 +156,11 @@ body: |
     ; CHECK-LABEL: name: test_or_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -179,11 +179,11 @@ body: |
     ; CHECK-LABEL: name: test_or_s24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
index d767bfb2d7501..d661a25a76536 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir
@@ -214,11 +214,10 @@ body: |
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]]
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_shl_s16_i8
@@ -226,11 +225,10 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]]
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
@@ -262,24 +260,22 @@ body: |
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_shl_i8_i8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
@@ -715,24 +711,22 @@ body: |
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16)
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_shl_s7_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16)
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16)
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
index c533e00ad1a0d..cf17f35ebc735 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir
@@ -93,28 +93,24 @@ body: |
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]]
+    ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]]
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_umax_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]]
+    ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
index c33c99dc43c31..bbce1a7af3da2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir
@@ -93,28 +93,24 @@ body: |
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
-    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
+    ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]]
+    ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]]
     ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; GFX9-LABEL: name: test_umin_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]]
-    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]]
+    ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]]
     ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
index fae9ae8c9f56e..29e37ec43eb55 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir
@@ -156,11 +156,11 @@ body: |
     ; CHECK-LABEL: name: test_xor_s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[XOR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -179,11 +179,11 @@ body: |
     ; CHECK-LABEL: name: test_xor_s24
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]]
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[XOR]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0

From 655cb4a2d702b70899f3ad384d047537e02698d8 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Tue, 16 Jul 2019 14:51:46 +0000
Subject: [PATCH 238/451] [OPENMP]Add support for analysis of if clauses.

Summary:
Added support for analysis of if clauses in the OpenMP directives to be
able to check for the use of uninitialized variables.

Reviewers: NoQ

Subscribers: guansong, jfb, jdoerfert, caomhin, kkwli0, cfe-commits

Tags: clang

Differential Revision: https://reviews.llvm.org/D64646

llvm-svn: 366211
---
 clang/include/clang/AST/OpenMPClause.h        |   7 +-
 clang/lib/AST/OpenMPClause.cpp                |  19 +
 clang/lib/Analysis/CFG.cpp                    |   5 +-
 clang/test/Analysis/cfg-openmp.cpp            | 532 ++++++++++--------
 clang/test/OpenMP/cancel_if_messages.cpp      |  10 +
 .../distribute_parallel_for_if_messages.cpp   |   7 +
 ...stribute_parallel_for_simd_if_messages.cpp |   7 +
 .../test/OpenMP/parallel_for_if_messages.cpp  |   7 +
 .../OpenMP/parallel_for_simd_if_messages.cpp  |   7 +
 clang/test/OpenMP/parallel_if_messages.cpp    |   7 +
 .../OpenMP/parallel_sections_if_messages.cpp  |   8 +
 clang/test/OpenMP/target_data_if_messages.cpp |   7 +
 .../OpenMP/target_enter_data_if_messages.cpp  |   7 +
 .../OpenMP/target_exit_data_if_messages.cpp   |   7 +
 clang/test/OpenMP/target_if_messages.cpp      |   7 +
 .../target_parallel_for_if_messages.cpp       |   7 +
 .../target_parallel_for_simd_if_messages.cpp  |   7 +
 .../OpenMP/target_parallel_if_messages.cpp    |   7 +
 clang/test/OpenMP/target_simd_if_messages.cpp |   7 +
 .../target_teams_distribute_if_messages.cpp   |   7 +
 ...ms_distribute_parallel_for_if_messages.cpp |   7 +
 ...stribute_parallel_for_simd_if_messages.cpp |   8 +
 ...rget_teams_distribute_simd_if_messages.cpp |   7 +
 .../test/OpenMP/target_teams_if_messages.cpp  |   7 +
 .../test/OpenMP/target_update_if_messages.cpp |   7 +
 clang/test/OpenMP/task_if_messages.cpp        |   7 +
 ...ms_distribute_parallel_for_if_messages.cpp |   8 +
 ...stribute_parallel_for_simd_if_messages.cpp |   8 +
 28 files changed, 497 insertions(+), 241 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index c6daf73a623bd..eadcc62a34575 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -501,11 +501,10 @@ class OMPIfClause : public OMPClause, public OMPClauseWithPreInit {
     return const_child_range(&Condition, &Condition + 1);
   }
 
-  child_range used_children() {
-    return child_range(child_iterator(), child_iterator());
-  }
+  child_range used_children();
   const_child_range used_children() const {
-    return const_child_range(const_child_iterator(), const_child_iterator());
+    auto Children = const_cast<OMPIfClause *>(this)->used_children();
+    return const_child_range(Children.begin(), Children.end());
   }
 
   static bool classof(const OMPClause *T) {
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 41520b380276c..9d8a7ebc3023e 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -209,6 +209,25 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C)
   return nullptr;
 }
 
+/// Gets the address of the original, non-captured, expression used in the
+/// clause as the preinitializer.
+static Stmt **getAddrOfExprAsWritten(Stmt *S) {
+  if (!S)
+    return nullptr;
+  if (auto *DS = dyn_cast<DeclStmt>(S)) {
+    assert(DS->isSingleDecl() && "Only single expression must be captured.");
+    if (auto *OED = dyn_cast<OMPCapturedExprDecl>(DS->getSingleDecl()))
+      return OED->getInitAddress();
+  }
+  return nullptr;
+}
+
+OMPClause::child_range OMPIfClause::used_children() {
+  if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt()))
+    return child_range(C, C + 1);
+  return child_range(&Condition, &Condition + 1);
+}
+
 OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num,
                                            unsigned NumLoops,
                                            SourceLocation StartLoc,
diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp
index de89105a29257..0ed1e988a196a 100644
--- a/clang/lib/Analysis/CFG.cpp
+++ b/clang/lib/Analysis/CFG.cpp
@@ -4746,8 +4746,9 @@ CFGBlock *CFGBuilder::VisitOMPExecutableDirective(OMPExecutableDirective *D,
 
   // Reverse the elements to process them in natural order. Iterators are not
   // bidirectional, so we need to create temp vector.
-  for (Stmt *S : llvm::reverse(llvm::to_vector<8>(
-           OMPExecutableDirective::used_clauses_children(D->clauses())))) {
+  SmallVector<Stmt *, 8> Used(
+      OMPExecutableDirective::used_clauses_children(D->clauses()));
+  for (Stmt *S : llvm::reverse(Used)) {
     assert(S && "Expected non-null used-in-clause child.");
     if (CFGBlock *R = Visit(S))
       B = R;
diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp
index 2f734d14b0216..dd417bf408c8f 100644
--- a/clang/test/Analysis/cfg-openmp.cpp
+++ b/clang/test/Analysis/cfg-openmp.cpp
@@ -1,340 +1,402 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp | FileCheck %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp -fopenmp-version=45 | FileCheck %s
 
 // CHECK-LABEL:  void xxx(int argc)
 void xxx(int argc) {
 // CHECK:        [B1]
 // CHECK-NEXT:   1: int x;
-  int x;
-// CHECK-NEXT:   2: x
-// CHECK-NEXT:   3: [B1.2] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:   4: argc
-// CHECK-NEXT:   5: [B1.4] = [B1.3]
-// CHECK-NEXT:   6: #pragma omp atomic read
-// CHECK-NEXT:    [B1.5];
+// CHECK-NEXT:   2: int cond;
+  int x, cond;
+// CHECK-NEXT:   [[#ATOM:]]: x
+// CHECK-NEXT:   [[#ATOM+1]]: [B1.[[#ATOM]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:   [[#ATOM+2]]: argc
+// CHECK-NEXT:   [[#ATOM+3]]: [B1.[[#ATOM+2]]] = [B1.[[#ATOM+1]]]
+// CHECK-NEXT:   [[#ATOM+4]]: #pragma omp atomic read
+// CHECK-NEXT:   [B1.[[#ATOM+3]]];
 #pragma omp atomic read
   argc = x;
-// CHECK-NEXT:   7: x
-// CHECK-NEXT:   8: [B1.7] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:   9: argc
-// CHECK-NEXT:  10: [B1.9] = [B1.8]
-// CHECK-NEXT:  11: #pragma omp critical
-// CHECK-NEXT:    [B1.10];
+// CHECK-NEXT:   [[#CRIT:]]: x
+// CHECK-NEXT:   [[#CRIT+1]]: [B1.[[#CRIT]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:   [[#CRIT+2]]: argc
+// CHECK-NEXT:   [[#CRIT+3]]: [B1.[[#CRIT+2]]] = [B1.[[#CRIT+1]]]
+// CHECK-NEXT:   [[#CRIT+4]]: #pragma omp critical
+// CHECK-NEXT:   [B1.[[#CRIT+3]]];
 #pragma omp critical
   argc = x;
-// CHECK-NEXT:  12: x
-// CHECK-NEXT:  13: [B1.12] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  14: argc
-// CHECK-NEXT:  15: [B1.14] = [B1.13]
-// CHECK-NEXT:  16: #pragma omp distribute parallel for
+// CHECK-NEXT:  [[#DPF:]]: x
+// CHECK-NEXT:  [[#DPF+1]]: [B1.[[#DPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPF+2]]: argc
+// CHECK-NEXT:  [[#DPF+3]]: [B1.[[#DPF+2]]] = [B1.[[#DPF+1]]]
+// CHECK-NEXT:  [[#DPF+4]]: cond
+// CHECK-NEXT:  [[#DPF+5]]: [B1.[[#DPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPF+6]]: [B1.[[#DPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#DPF+7]]: #pragma omp distribute parallel for if(parallel: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.15];
-#pragma omp distribute parallel for
+// CHECK-NEXT:        [B1.[[#DPF+3]]];
+#pragma omp distribute parallel for if(parallel:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  17: x
-// CHECK-NEXT:  18: [B1.17] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  19: argc
-// CHECK-NEXT:  20: [B1.19] = [B1.18]
-// CHECK-NEXT:  21: #pragma omp distribute parallel for simd
+// CHECK-NEXT:  [[#DPFS:]]: x
+// CHECK-NEXT:  [[#DPFS+1]]: [B1.[[#DPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPFS+2]]: argc
+// CHECK-NEXT:  [[#DPFS+3]]: [B1.[[#DPFS+2]]] = [B1.[[#DPFS+1]]]
+// CHECK-NEXT:  [[#DPFS+4]]: cond
+// CHECK-NEXT:  [[#DPFS+5]]: [B1.[[#DPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DPFS+6]]: [B1.[[#DPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#DPFS+7]]: #pragma omp distribute parallel for simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.20];
-#pragma omp distribute parallel for simd
+// CHECK-NEXT:        [B1.[[#DPFS+3]]];
+#pragma omp distribute parallel for simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  22: x
-// CHECK-NEXT:  23: [B1.22] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  24: argc
-// CHECK-NEXT:  25: [B1.24] = [B1.23]
-// CHECK-NEXT:  26: #pragma omp distribute simd
+// CHECK-NEXT:  [[#DS:]]: x
+// CHECK-NEXT:  [[#DS+1]]: [B1.[[#DS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#DS+2]]: argc
+// CHECK-NEXT:  [[#DS+3]]: [B1.[[#DS+2]]] = [B1.[[#DS+1]]]
+// CHECK-NEXT:  [[#DS+4]]: #pragma omp distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.25];
+// CHECK-NEXT:        [B1.[[#DS+3]]];
 #pragma omp distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  27: x
-// CHECK-NEXT:  28: [B1.27] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  29: argc
-// CHECK-NEXT:  30: [B1.29] = [B1.28]
-// CHECK-NEXT:  31: #pragma omp for
+// CHECK-NEXT:  [[#FOR:]]: x
+// CHECK-NEXT:  [[#FOR+1]]: [B1.[[#FOR]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#FOR+2]]: argc
+// CHECK-NEXT:  [[#FOR+3]]: [B1.[[#FOR+2]]] = [B1.[[#FOR+1]]]
+// CHECK-NEXT:  [[#FOR+4]]: #pragma omp for
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.30];
+// CHECK-NEXT:        [B1.[[#FOR+3]]];
 #pragma omp for
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  32: x
-// CHECK-NEXT:  33: [B1.32] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  34: argc
-// CHECK-NEXT:  35: [B1.34] = [B1.33]
-// CHECK-NEXT:  36: #pragma omp for simd
+// CHECK-NEXT:  [[#FS:]]: x
+// CHECK-NEXT:  [[#FS+1]]: [B1.[[#FS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#FS+2]]: argc
+// CHECK-NEXT:  [[#FS+3]]: [B1.[[#FS+2]]] = [B1.[[#FS+1]]]
+// CHECK-NEXT:  [[#FS+4]]: #pragma omp for simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.35];
+// CHECK-NEXT:        [B1.[[#FS+3]]];
 #pragma omp for simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  37: x
-// CHECK-NEXT:  38: [B1.37] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  39: argc
-// CHECK-NEXT:  40: [B1.39] = [B1.38]
-// CHECK-NEXT:  41: #pragma omp master
-// CHECK-NEXT:    [B1.40];
+// CHECK-NEXT:  [[#MASTER:]]: x
+// CHECK-NEXT:  [[#MASTER+1]]: [B1.[[#MASTER]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#MASTER+2]]: argc
+// CHECK-NEXT:  [[#MASTER+3]]: [B1.[[#MASTER+2]]] = [B1.[[#MASTER+1]]]
+// CHECK-NEXT:  [[#MASTER+4]]: #pragma omp master
+// CHECK-NEXT:    [B1.[[#MASTER+3]]];
 #pragma omp master
   argc = x;
-// CHECK-NEXT:  42: x
-// CHECK-NEXT:  43: [B1.42] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  44: argc
-// CHECK-NEXT:  45: [B1.44] = [B1.43]
-// CHECK-NEXT:  46: #pragma omp ordered
-// CHECK-NEXT:    [B1.45];
-// CHECK-NEXT:  47: #pragma omp for ordered
+// CHECK-NEXT:  [[#ORD:]]: x
+// CHECK-NEXT:  [[#ORD+1]]: [B1.[[#ORD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#ORD+2]]: argc
+// CHECK-NEXT:  [[#ORD+3]]: [B1.[[#ORD+2]]] = [B1.[[#ORD+1]]]
+// CHECK-NEXT:  [[#ORD+4]]: #pragma omp ordered
+// CHECK-NEXT:    [B1.[[#ORD+3]]];
+// CHECK-NEXT:  [[#ORD+5]]: #pragma omp for ordered
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i) {
-// CHECK-NEXT:[B1.46]    }
+// CHECK-NEXT:[B1.[[#ORD+4]]]    }
 #pragma omp for ordered
   for (int i = 0; i < 10; ++i) {
 #pragma omp ordered
     argc = x;
   }
-// CHECK-NEXT:  48: x
-// CHECK-NEXT:  49: [B1.48] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  50: argc
-// CHECK-NEXT:  51: [B1.50] = [B1.49]
-// CHECK-NEXT:  52: #pragma omp parallel for
+// CHECK-NEXT:  [[#PF:]]: x
+// CHECK-NEXT:  [[#PF+1]]: [B1.[[#PF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PF+2]]: argc
+// CHECK-NEXT:  [[#PF+3]]: [B1.[[#PF+2]]] = [B1.[[#PF+1]]]
+// CHECK-NEXT:  [[#PF+4]]: cond
+// CHECK-NEXT:  [[#PF+5]]: [B1.[[#PF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PF+6]]: [B1.[[#PF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PF+7]]: #pragma omp parallel for if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.51];
-#pragma omp parallel for
+// CHECK-NEXT:        [B1.[[#PF+3]]];
+#pragma omp parallel for if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  53: x
-// CHECK-NEXT:  54: [B1.53] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  55: argc
-// CHECK-NEXT:  56: [B1.55] = [B1.54]
-// CHECK-NEXT:  57: #pragma omp parallel for simd
+// CHECK-NEXT:  [[#PFS:]]: x
+// CHECK-NEXT:  [[#PFS+1]]: [B1.[[#PFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PFS+2]]: argc
+// CHECK-NEXT:  [[#PFS+3]]: [B1.[[#PFS+2]]] = [B1.[[#PFS+1]]]
+// CHECK-NEXT:  [[#PFS+4]]: cond
+// CHECK-NEXT:  [[#PFS+5]]: [B1.[[#PFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PFS+6]]: [B1.[[#PFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PFS+7]]: #pragma omp parallel for simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.56];
-#pragma omp parallel for simd
+// CHECK-NEXT:        [B1.[[#PFS+3]]];
+#pragma omp parallel for simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  58: x
-// CHECK-NEXT:  59: [B1.58] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  60: argc
-// CHECK-NEXT:  61: [B1.60] = [B1.59]
-// CHECK-NEXT:  62: #pragma omp parallel
-// CHECK-NEXT:    [B1.61];
-#pragma omp parallel
+// CHECK-NEXT:  [[#PAR:]]: x
+// CHECK-NEXT:  [[#PAR+1]]: [B1.[[#PAR]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PAR+2]]: argc
+// CHECK-NEXT:  [[#PAR+3]]: [B1.[[#PAR+2]]] = [B1.[[#PAR+1]]]
+// CHECK-NEXT:  [[#PAR+4]]: cond
+// CHECK-NEXT:  [[#PAR+5]]: [B1.[[#PAR+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PAR+6]]: [B1.[[#PAR+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PAR+7]]: #pragma omp parallel if(cond)
+// CHECK-NEXT:    [B1.[[#PAR+3]]];
+#pragma omp parallel if(cond)
   argc = x;
-// CHECK-NEXT:  63: x
-// CHECK-NEXT:  64: [B1.63] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  65: argc
-// CHECK-NEXT:  66: [B1.65] = [B1.64]
-// CHECK-NEXT:  67: #pragma omp parallel sections
+// CHECK-NEXT:  [[#PSECT:]]: x
+// CHECK-NEXT:  [[#PSECT+1]]: [B1.[[#PSECT]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PSECT+2]]: argc
+// CHECK-NEXT:  [[#PSECT+3]]: [B1.[[#PSECT+2]]] = [B1.[[#PSECT+1]]]
+// CHECK-NEXT:  [[#PSECT+4]]: cond
+// CHECK-NEXT:  [[#PSECT+5]]: [B1.[[#PSECT+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#PSECT+6]]: [B1.[[#PSECT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#PSECT+7]]: #pragma omp parallel sections if(cond)
 // CHECK-NEXT:    {
-// CHECK-NEXT:        [B1.66];
+// CHECK-NEXT:        [B1.[[#PSECT+3]]];
 // CHECK-NEXT:    }
-#pragma omp parallel sections
+#pragma omp parallel sections if(cond)
   {
     argc = x;
   }
-// CHECK-NEXT:  68: x
-// CHECK-NEXT:  69: [B1.68] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  70: argc
-// CHECK-NEXT:  71: [B1.70] = [B1.69]
-// CHECK-NEXT:  72: #pragma omp simd
+// CHECK-NEXT:  [[#SIMD:]]: x
+// CHECK-NEXT:  [[#SIMD+1]]: [B1.[[#SIMD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#SIMD+2]]: argc
+// CHECK-NEXT:  [[#SIMD+3]]: [B1.[[#SIMD+2]]] = [B1.[[#SIMD+1]]]
+// CHECK-NEXT:  [[#SIMD+4]]: #pragma omp simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.71];
+// CHECK-NEXT:        [B1.[[#SIMD+3]]];
 #pragma omp simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  73: x
-// CHECK-NEXT:  74: [B1.73] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  75: argc
-// CHECK-NEXT:  76: [B1.75] = [B1.74]
-// CHECK-NEXT:  77: #pragma omp single
-// CHECK-NEXT:    [B1.76];
+// CHECK-NEXT:  [[#SINGLE:]]: x
+// CHECK-NEXT:  [[#SINGLE+1]]: [B1.[[#SINGLE]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#SINGLE+2]]: argc
+// CHECK-NEXT:  [[#SINGLE+3]]: [B1.[[#SINGLE+2]]] = [B1.[[#SINGLE+1]]]
+// CHECK-NEXT:  [[#SINGLE+4]]: #pragma omp single
+// CHECK-NEXT:    [B1.[[#SINGLE+3]]];
 #pragma omp single
   argc = x;
-// CHECK-NEXT:  78: x
-// CHECK-NEXT:  79: [B1.78] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  80: argc
-// CHECK-NEXT:  81: [B1.80] = [B1.79]
-// CHECK-NEXT:  82: #pragma omp target depend(in : argc)
-// CHECK-NEXT:    [B1.81];
+// CHECK-NEXT:  [[#TARGET:]]: x
+// CHECK-NEXT:  [[#TARGET+1]]: [B1.[[#TARGET]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TARGET+2]]: argc
+// CHECK-NEXT:  [[#TARGET+3]]: [B1.[[#TARGET+2]]] = [B1.[[#TARGET+1]]]
+// CHECK-NEXT:  [[#TARGET+4]]: cond
+// CHECK-NEXT:  [[#TARGET+5]]: [B1.[[#TARGET+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TARGET+6]]: [B1.[[#TARGET+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TARGET+7]]: #pragma omp target depend(in : argc) if(cond)
+// CHECK-NEXT:    [B1.[[#TARGET+3]]];
 #pragma omp target depend(in \
-                          : argc)
+                          : argc) if(cond)
   argc = x;
-// CHECK-NEXT:  83: x
-// CHECK-NEXT:  84: [B1.83] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  85: argc
-// CHECK-NEXT:  86: [B1.85] = [B1.84]
-// CHECK-NEXT:  87: #pragma omp target parallel for
+// CHECK-NEXT:  [[#TPF:]]: x
+// CHECK-NEXT:  [[#TPF+1]]: [B1.[[#TPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPF+2]]: argc
+// CHECK-NEXT:  [[#TPF+3]]: [B1.[[#TPF+2]]] = [B1.[[#TPF+1]]]
+// CHECK-NEXT:  [[#TPF+4]]: cond
+// CHECK-NEXT:  [[#TPF+5]]: [B1.[[#TPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPF+6]]: [B1.[[#TPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TPF+7]]: #pragma omp target parallel for if(parallel: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.86];
-#pragma omp target parallel for
+// CHECK-NEXT:        [B1.[[#TPF+3]]];
+#pragma omp target parallel for if(parallel:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  88: x
-// CHECK-NEXT:  89: [B1.88] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  90: argc
-// CHECK-NEXT:  91: [B1.90] = [B1.89]
-// CHECK-NEXT:  92: #pragma omp target parallel for simd
+// CHECK-NEXT:  [[#TPFS:]]: x
+// CHECK-NEXT:  [[#TPFS+1]]: [B1.[[#TPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPFS+2]]: argc
+// CHECK-NEXT:  [[#TPFS+3]]: [B1.[[#TPFS+2]]] = [B1.[[#TPFS+1]]]
+// CHECK-NEXT:  [[#TPFS+4]]: cond
+// CHECK-NEXT:  [[#TPFS+5]]: [B1.[[#TPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TPFS+6]]: [B1.[[#TPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TPFS+7]]: #pragma omp target parallel for simd if(target: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.91];
-#pragma omp target parallel for simd
+// CHECK-NEXT:        [B1.[[#TPFS+3]]];
+#pragma omp target parallel for simd if(target:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:  93: x
-// CHECK-NEXT:  94: [B1.93] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT:  95: argc
-// CHECK-NEXT:  96: [B1.95] = [B1.94]
-// CHECK-NEXT:  97: #pragma omp target parallel
-// CHECK-NEXT:    [B1.96];
-#pragma omp target parallel
+// CHECK-NEXT:  [[#TP:]]: x
+// CHECK-NEXT:  [[#TP+1]]: [B1.[[#TP]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TP+2]]: argc
+// CHECK-NEXT:  [[#TP+3]]: [B1.[[#TP+2]]] = [B1.[[#TP+1]]]
+// CHECK-NEXT:  [[#TP+4]]: cond
+// CHECK-NEXT:  [[#TP+5]]: [B1.[[#TP+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TP+6]]: [B1.[[#TP+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TP+7]]: #pragma omp target parallel if(cond)
+// CHECK-NEXT:    [B1.[[#TP+3]]];
+#pragma omp target parallel if(cond)
   argc = x;
-// CHECK-NEXT:  98: x
-// CHECK-NEXT:  99: [B1.98] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 100: argc
-// CHECK-NEXT: 101: [B1.100] = [B1.99]
-// CHECK-NEXT: 102: #pragma omp target simd
+// CHECK-NEXT:  [[#TSIMD:]]: x
+// CHECK-NEXT:  [[#TSIMD+1]]: [B1.[[#TSIMD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TSIMD+2]]: argc
+// CHECK-NEXT:  [[#TSIMD+3]]: [B1.[[#TSIMD+2]]] = [B1.[[#TSIMD+1]]]
+// CHECK-NEXT:  [[#TSIMD+4]]: cond
+// CHECK-NEXT:  [[#TSIMD+5]]: [B1.[[#TSIMD+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TSIMD+6]]: [B1.[[#TSIMD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TSIMD+7]]: #pragma omp target simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.101];
-#pragma omp target simd
+// CHECK-NEXT:        [B1.[[#TSIMD+3]]];
+#pragma omp target simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 103: x
-// CHECK-NEXT: 104: [B1.103] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 105: argc
-// CHECK-NEXT: 106: [B1.105] = [B1.104]
-// CHECK-NEXT: 107: #pragma omp target teams distribute
+// CHECK-NEXT:  [[#TTD:]]: x
+// CHECK-NEXT:  [[#TTD+1]]: [B1.[[#TTD]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTD+2]]: argc
+// CHECK-NEXT:  [[#TTD+3]]: [B1.[[#TTD+2]]] = [B1.[[#TTD+1]]]
+// CHECK-NEXT:  [[#TTD+4]]: cond
+// CHECK-NEXT:  [[#TTD+5]]: [B1.[[#TTD+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTD+6]]: [B1.[[#TTD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTD+7]]: #pragma omp target teams distribute if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.106];
-#pragma omp target teams distribute
+// CHECK-NEXT:        [B1.[[#TTD+3]]];
+#pragma omp target teams distribute if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 108: x
-// CHECK-NEXT: 109: [B1.108] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 110: argc
-// CHECK-NEXT: 111: [B1.110] = [B1.109]
-// CHECK-NEXT: 112: #pragma omp target teams distribute parallel for
+// CHECK-NEXT:  [[#TTDPF:]]: x
+// CHECK-NEXT:  [[#TTDPF+1]]: [B1.[[#TTDPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPF+2]]: argc
+// CHECK-NEXT:  [[#TTDPF+3]]: [B1.[[#TTDPF+2]]] = [B1.[[#TTDPF+1]]]
+// CHECK-NEXT:  [[#TTDPF+4]]: cond
+// CHECK-NEXT:  [[#TTDPF+5]]: [B1.[[#TTDPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPF+6]]: [B1.[[#TTDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTDPF+7]]: #pragma omp target teams distribute parallel for if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.111];
-#pragma omp target teams distribute parallel for
+// CHECK-NEXT:        [B1.[[#TTDPF+3]]];
+#pragma omp target teams distribute parallel for if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 113: x
-// CHECK-NEXT: 114: [B1.113] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 115: argc
-// CHECK-NEXT: 116: [B1.115] = [B1.114]
-// CHECK-NEXT: 117: #pragma omp target teams distribute parallel for simd
+// CHECK-NEXT:  [[#TTDPFS:]]: x
+// CHECK-NEXT:  [[#TTDPFS+1]]: [B1.[[#TTDPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPFS+2]]: argc
+// CHECK-NEXT:  [[#TTDPFS+3]]: [B1.[[#TTDPFS+2]]] = [B1.[[#TTDPFS+1]]]
+// CHECK-NEXT:  [[#TTDPFS+4]]: cond
+// CHECK-NEXT:  [[#TTDPFS+5]]: [B1.[[#TTDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDPFS+6]]: [B1.[[#TTDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTDPFS+7]]: #pragma omp target teams distribute parallel for simd if(parallel: cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.116];
-#pragma omp target teams distribute parallel for simd
+// CHECK-NEXT:        [B1.[[#TTDPFS+3]]];
+#pragma omp target teams distribute parallel for simd if(parallel:cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 118: x
-// CHECK-NEXT: 119: [B1.118] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 120: argc
-// CHECK-NEXT: 121: [B1.120] = [B1.119]
-// CHECK-NEXT: 122: #pragma omp target teams distribute simd
+// CHECK-NEXT:  [[#TTDS:]]: x
+// CHECK-NEXT:  [[#TTDS+1]]: [B1.[[#TTDS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDS+2]]: argc
+// CHECK-NEXT:  [[#TTDS+3]]: [B1.[[#TTDS+2]]] = [B1.[[#TTDS+1]]]
+// CHECK-NEXT:  [[#TTDS+4]]: cond
+// CHECK-NEXT:  [[#TTDS+5]]: [B1.[[#TTDS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TTDS+6]]: [B1.[[#TTDS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TTDS+7]]: #pragma omp target teams distribute simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.121];
-#pragma omp target teams distribute simd
+// CHECK-NEXT:        [B1.[[#TTDS+3]]];
+#pragma omp target teams distribute simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 123: x
-// CHECK-NEXT: 124: [B1.123] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 125: argc
-// CHECK-NEXT: 126: [B1.125] = [B1.124]
-// CHECK-NEXT: 127: #pragma omp target teams
-// CHECK-NEXT:    [B1.126];
-#pragma omp target teams
+// CHECK-NEXT:  [[#TT:]]: x
+// CHECK-NEXT:  [[#TT+1]]: [B1.[[#TT]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TT+2]]: argc
+// CHECK-NEXT:  [[#TT+3]]: [B1.[[#TT+2]]] = [B1.[[#TT+1]]]
+// CHECK-NEXT:  [[#TT+4]]: cond
+// CHECK-NEXT:  [[#TT+5]]: [B1.[[#TT+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TT+6]]: [B1.[[#TT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TT+7]]: #pragma omp target teams if(cond)
+// CHECK-NEXT:    [B1.[[#TT+3]]];
+#pragma omp target teams if(cond)
   argc = x;
-// CHECK-NEXT: 128: #pragma omp target update to(x)
-#pragma omp target update to(x)
-// CHECK-NEXT: 129: x
-// CHECK-NEXT: 130: [B1.129] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 131: argc
-// CHECK-NEXT: 132: [B1.131] = [B1.130]
+// CHECK-NEXT: [[#TU:]]: cond
+// CHECK-NEXT: [[#TU+1]]: [B1.[[#TU]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT: [[#TU+2]]: [B1.[[#TU+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT: [[#TU+3]]: #pragma omp target update to(x) if(target update: cond)
+#pragma omp target update to(x) if(target update:cond)
+// CHECK-NEXT:  [[#TASK:]]: x
+// CHECK-NEXT:  [[#TASK+1]]: [B1.[[#TASK]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TASK+2]]: argc
+// CHECK-NEXT:  [[#TASK+3]]: [B1.[[#TASK+2]]] = [B1.[[#TASK+1]]]
+// CHECK-NEXT:  [[#TASK+4]]: cond
+// CHECK-NEXT:  [[#TASK+5]]: [B1.[[#TASK+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TASK+6]]: [B1.[[#TASK+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TASK+7]]: #pragma omp task if(cond)
+// CHECK-NEXT:    [B1.[[#TASK+3]]];
+#pragma omp task if(cond)
   argc = x;
-// CHECK-NEXT: 133: x
-// CHECK-NEXT: 134: [B1.133] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 135: argc
-// CHECK-NEXT: 136: [B1.135] = [B1.134]
-// CHECK-NEXT: 137: #pragma omp task
-// CHECK-NEXT:    [B1.136];
-#pragma omp task
-  argc = x;
-// CHECK-NEXT: 138: x
-// CHECK-NEXT: 139: [B1.138] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 140: argc
-// CHECK-NEXT: 141: [B1.140] = [B1.139]
-// CHECK-NEXT: 142: #pragma omp taskgroup
-// CHECK-NEXT:    [B1.141];
+// CHECK-NEXT:  [[#TG:]]: x
+// CHECK-NEXT:  [[#TG+1]]: [B1.[[#TG]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TG+2]]: argc
+// CHECK-NEXT:  [[#TG+3]]: [B1.[[#TG+2]]] = [B1.[[#TG+1]]]
+// CHECK-NEXT:  [[#TG+4]]: #pragma omp taskgroup
+// CHECK-NEXT:    [B1.[[#TG+3]]];
 #pragma omp taskgroup
   argc = x;
-// CHECK-NEXT: 143: x
-// CHECK-NEXT: 144: [B1.143] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 145: argc
-// CHECK-NEXT: 146: [B1.145] = [B1.144]
-// CHECK-NEXT: 147: #pragma omp taskloop
+// CHECK-NEXT:  [[#TL:]]: x
+// CHECK-NEXT:  [[#TL+1]]: [B1.[[#TL]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TL+2]]: argc
+// CHECK-NEXT:  [[#TL+3]]: [B1.[[#TL+2]]] = [B1.[[#TL+1]]]
+// CHECK-NEXT:  [[#TL+4]]: cond
+// CHECK-NEXT:  [[#TL+5]]: [B1.[[#TL+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TL+6]]: [B1.[[#TL+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TL+7]]: #pragma omp taskloop if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.146];
-#pragma omp taskloop
+// CHECK-NEXT:        [B1.[[#TL+3]]];
+#pragma omp taskloop if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 148: x
-// CHECK-NEXT: 149: [B1.148] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 150: argc
-// CHECK-NEXT: 151: [B1.150] = [B1.149]
-// CHECK-NEXT: 152: #pragma omp taskloop simd
+// CHECK-NEXT:  [[#TLS:]]: x
+// CHECK-NEXT:  [[#TLS+1]]: [B1.[[#TLS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TLS+2]]: argc
+// CHECK-NEXT:  [[#TLS+3]]: [B1.[[#TLS+2]]] = [B1.[[#TLS+1]]]
+// CHECK-NEXT:  [[#TLS+4]]: cond
+// CHECK-NEXT:  [[#TLS+5]]: [B1.[[#TLS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TLS+6]]: [B1.[[#TLS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TLS+7]]: #pragma omp taskloop simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.151];
-#pragma omp taskloop simd
+// CHECK-NEXT:        [B1.[[#TLS+3]]];
+#pragma omp taskloop simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT: 153: x
-// CHECK-NEXT: 154: [B1.153] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 155: argc
-// CHECK-NEXT: 156: [B1.155] = [B1.154]
-// CHECK-NEXT: 157: #pragma omp teams distribute parallel for
+// CHECK-NEXT:  [[#TDPF:]]: x
+// CHECK-NEXT:  [[#TDPF+1]]: [B1.[[#TDPF]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPF+2]]: argc
+// CHECK-NEXT:  [[#TDPF+3]]: [B1.[[#TDPF+2]]] = [B1.[[#TDPF+1]]]
+// CHECK-NEXT:  [[#TDPF+4]]: cond
+// CHECK-NEXT:  [[#TDPF+5]]: [B1.[[#TDPF+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPF+6]]: [B1.[[#TDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TDPF+7]]: #pragma omp teams distribute parallel for if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.156];
-// CHECK-NEXT: 158: #pragma omp target
+// CHECK-NEXT:        [B1.[[#TDPF+3]]];
+// CHECK-NEXT:  [[#TDPF+8]]: #pragma omp target
 #pragma omp target
-#pragma omp teams distribute parallel for
+#pragma omp teams distribute parallel for if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:[B1.157] 159: x
-// CHECK-NEXT: 160: [B1.159] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 161: argc
-// CHECK-NEXT: 162: [B1.161] = [B1.160]
-// CHECK-NEXT: 163: #pragma omp teams distribute parallel for simd
+// CHECK-NEXT:  [B1.[[#TDPF+7]]] [[#TDPFS:]]: x
+// CHECK-NEXT:  [[#TDPFS+1]]: [B1.[[#TDPFS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPFS+2]]: argc
+// CHECK-NEXT:  [[#TDPFS+3]]: [B1.[[#TDPFS+2]]] = [B1.[[#TDPFS+1]]]
+// CHECK-NEXT:  [[#TDPFS+4]]: cond
+// CHECK-NEXT:  [[#TDPFS+5]]: [B1.[[#TDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDPFS+6]]: [B1.[[#TDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool)
+// CHECK-NEXT:  [[#TDPFS+7]]: #pragma omp teams distribute parallel for simd if(cond)
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.162];
-// CHECK-NEXT: 164: #pragma omp target
+// CHECK-NEXT:        [B1.[[#TDPFS+3]]];
+// CHECK-NEXT:  [[#TDPFS+8]]: #pragma omp target
 #pragma omp target
-#pragma omp teams distribute parallel for simd
+#pragma omp teams distribute parallel for simd if(cond)
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:[B1.163] 165: x
-// CHECK-NEXT: 166: [B1.165] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 167: argc
-// CHECK-NEXT: 168: [B1.167] = [B1.166]
-// CHECK-NEXT: 169: #pragma omp teams distribute simd
+// CHECK-NEXT:  [B1.[[#TDPFS+7]]] [[#TDS:]]: x
+// CHECK-NEXT:  [[#TDS+1]]: [B1.[[#TDS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TDS+2]]: argc
+// CHECK-NEXT:  [[#TDS+3]]: [B1.[[#TDS+2]]] = [B1.[[#TDS+1]]]
+// CHECK-NEXT:  [[#TDS+4]]: #pragma omp teams distribute simd
 // CHECK-NEXT:    for (int i = 0; i < 10; ++i)
-// CHECK-NEXT:        [B1.168];
-// CHECK-NEXT: 170: #pragma omp target
+// CHECK-NEXT:        [B1.[[#TDS+3]]];
+// CHECK-NEXT:  [[#TDS+5]]: #pragma omp target
 #pragma omp target
 #pragma omp teams distribute simd
   for (int i = 0; i < 10; ++i)
     argc = x;
-// CHECK-NEXT:[B1.169] 171: x
-// CHECK-NEXT: 172: [B1.171] (ImplicitCastExpr, LValueToRValue, int)
-// CHECK-NEXT: 173: argc
-// CHECK-NEXT: 174: [B1.173] = [B1.172]
-// CHECK-NEXT: 175: #pragma omp teams
-// CHECK-NEXT:    [B1.174];
-// CHECK-NEXT: 176: #pragma omp target
+// CHECK-NEXT:  [B1.[[#TDS+4]]] [[#TEAMS:]]: x
+// CHECK-NEXT:  [[#TEAMS+1]]: [B1.[[#TEAMS]]] (ImplicitCastExpr, LValueToRValue, int)
+// CHECK-NEXT:  [[#TEAMS+2]]: argc
+// CHECK-NEXT:  [[#TEAMS+3]]: [B1.[[#TEAMS+2]]] = [B1.[[#TEAMS+1]]]
+// CHECK-NEXT:  [[#TEAMS+4]]: #pragma omp teams
+// CHECK-NEXT:    [B1.[[#TEAMS+3]]];
+// CHECK-NEXT:  [[#TEAMS+5]]: #pragma omp target
 #pragma omp target
 #pragma omp teams
   argc = x;
-// CHECK-NEXT:[B1.175]   Preds
+// CHECK-NEXT:  [B1.[[#TEAMS+4]]]   Preds
 }
 
diff --git a/clang/test/OpenMP/cancel_if_messages.cpp b/clang/test/OpenMP/cancel_if_messages.cpp
index 3d629c927e907..222087ca9e61b 100644
--- a/clang/test/OpenMP/cancel_if_messages.cpp
+++ b/clang/test/OpenMP/cancel_if_messages.cpp
@@ -9,6 +9,16 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel
+  {
+#pragma omp cancel parallel if (cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
index a06ff2377c043..e628a15c3ab44 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
index 7769272026e6b..6cf18faf0a87f 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_for_if_messages.cpp b/clang/test/OpenMP/parallel_for_if_messages.cpp
index 32f9ef3a7defa..56bb06be0cc71 100644
--- a/clang/test/OpenMP/parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
index aa1e302d04242..bab9339d49174 100644
--- a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_if_messages.cpp b/clang/test/OpenMP/parallel_if_messages.cpp
index 7f802a9e4236a..f095e66bbfa5e 100644
--- a/clang/test/OpenMP/parallel_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/parallel_sections_if_messages.cpp b/clang/test/OpenMP/parallel_sections_if_messages.cpp
index 8d36b6d5d3086..b7c92df4f30df 100644
--- a/clang/test/OpenMP/parallel_sections_if_messages.cpp
+++ b/clang/test/OpenMP/parallel_sections_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp parallel sections if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  {
+    ;
+  }
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_data_if_messages.cpp b/clang/test/OpenMP/target_data_if_messages.cpp
index c6f9b4b34eeea..29f898c6d9fa7 100644
--- a/clang/test/OpenMP/target_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_data_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target data map(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_enter_data_if_messages.cpp b/clang/test/OpenMP/target_enter_data_if_messages.cpp
index 5123d607dc6a1..21019e9ae7f8c 100644
--- a/clang/test/OpenMP/target_enter_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_enter_data_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target enter data map(to:argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_exit_data_if_messages.cpp b/clang/test/OpenMP/target_exit_data_if_messages.cpp
index c45b32ff3fe75..7b2385c16cd21 100644
--- a/clang/test/OpenMP/target_exit_data_if_messages.cpp
+++ b/clang/test/OpenMP/target_exit_data_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target exit data map(from: argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 int main(int argc, char **argv) {
diff --git a/clang/test/OpenMP/target_if_messages.cpp b/clang/test/OpenMP/target_if_messages.cpp
index e6b667f2cffbf..f381e9eb91ebd 100644
--- a/clang/test/OpenMP/target_if_messages.cpp
+++ b/clang/test/OpenMP/target_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_if_messages.cpp
index 445dc1775b0f2..a5a181b9d273a 100644
--- a/clang/test/OpenMP/target_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
index b0da8017019f0..ef9a2089d1087 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target parallel for simd if(parallel: cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_parallel_if_messages.cpp b/clang/test/OpenMP/target_parallel_if_messages.cpp
index 460e0c8655f09..ac498a7108b0d 100644
--- a/clang/test/OpenMP/target_parallel_if_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_simd_if_messages.cpp b/clang/test/OpenMP/target_simd_if_messages.cpp
index 94d2ab308daa2..5f3e9e3910ac6 100644
--- a/clang/test/OpenMP/target_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
index fd1ffb08cbe8c..499cd3ac58050 100644
--- a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
index e1114028b6877..6df23076472ec 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
index 59c75893a1714..e88c1f1dbbfff 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute parallel for simd if (parallel \
+                                                          : cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
index 7134a8394cbb8..53af6e759d21e 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams distribute simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_teams_if_messages.cpp b/clang/test/OpenMP/target_teams_if_messages.cpp
index 8d3d690d631fa..4bc82a349398d 100644
--- a/clang/test/OpenMP/target_teams_if_messages.cpp
+++ b/clang/test/OpenMP/target_teams_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target teams if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/target_update_if_messages.cpp b/clang/test/OpenMP/target_update_if_messages.cpp
index 9ded332b04eb9..d967713e456fb 100644
--- a/clang/test/OpenMP/target_update_if_messages.cpp
+++ b/clang/test/OpenMP/target_update_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target update to(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/task_if_messages.cpp b/clang/test/OpenMP/task_if_messages.cpp
index 305af22149d85..2d47b32b9a153 100644
--- a/clang/test/OpenMP/task_if_messages.cpp
+++ b/clang/test/OpenMP/task_if_messages.cpp
@@ -9,6 +9,13 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp task if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
index 6f724b050178a..b76599d41a46a 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target
+#pragma omp teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
index c01e6e87e39a5..39a0b326383a2 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp
@@ -9,6 +9,14 @@ bool foobool(int argc) {
   return argc;
 }
 
+void xxx(int argc) {
+  int cond; // expected-note {{initialize the variable 'cond' to silence this warning}}
+#pragma omp target
+#pragma omp teams distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
 struct S1; // expected-note {{declared here}}
 
 template <class T, class S> // expected-note {{declared here}}

From 8ece3b6719948a08d1f654d97f53dbd08891199d Mon Sep 17 00:00:00 2001
From: Neil Hickey <neil.hickey@arm.com>
Date: Tue, 16 Jul 2019 14:57:32 +0000
Subject: [PATCH 239/451] [OpenCL] Fixing sampler initialisations for C++ mode.

Allow conversions between integer and sampler type.

Differential Revision: https://reviews.llvm.org/D64791

llvm-svn: 366212
---
 clang/lib/Sema/SemaInit.cpp         |  6 +++---
 clang/lib/Sema/SemaOverload.cpp     |  4 ++++
 clang/test/CodeGenOpenCL/sampler.cl | 19 ++++++++++---------
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index b3b34699eb64f..bc1069609336c 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -5640,6 +5640,9 @@ void InitializationSequence::InitializeFrom(Sema &S,
   bool allowObjCWritebackConversion = S.getLangOpts().ObjCAutoRefCount &&
          Entity.isParameterKind();
 
+  if (TryOCLSamplerInitialization(S, *this, DestType, Initializer))
+    return;
+
   // We're at the end of the line for C: it's either a write-back conversion
   // or it's a C assignment. There's no need to check anything else.
   if (!S.getLangOpts().CPlusPlus) {
@@ -5649,9 +5652,6 @@ void InitializationSequence::InitializeFrom(Sema &S,
       return;
     }
 
-    if (TryOCLSamplerInitialization(S, *this, DestType, Initializer))
-      return;
-
     if (TryOCLZeroOpaqueTypeInitialization(S, *this, DestType, Initializer))
       return;
 
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 77e6767c2b814..d8c4ea48ebce7 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1851,6 +1851,10 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
              (From->EvaluateKnownConstInt(S.getASTContext()) == 0)) {
     SCS.Second = ICK_Zero_Queue_Conversion;
     FromType = ToType;
+  } else if (ToType->isSamplerT() &&
+             From->isIntegerConstantExpr(S.getASTContext())) {
+    SCS.Second = ICK_Compatible_Conversion;
+    FromType = ToType;
   } else {
     // No second conversion required.
     SCS.Second = ICK_Identity;
diff --git a/clang/test/CodeGenOpenCL/sampler.cl b/clang/test/CodeGenOpenCL/sampler.cl
index 74b6d55d5d37e..1ef1f538b2562 100644
--- a/clang/test/CodeGenOpenCL/sampler.cl
+++ b/clang/test/CodeGenOpenCL/sampler.cl
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=c++ -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s
 //
 // This test covers 5 cases of sampler initialzation:
 //   1. function argument passing
@@ -29,7 +30,7 @@ const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORD
 int get_sampler_initializer(void);
 
 void fnc4smp(sampler_t s) {}
-// CHECK: define spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* %
+// CHECK: define spir_func void [[FUNCNAME:@.*fnc4smp.*]](%opencl.sampler_t addrspace(2)* %
 
 kernel void foo(sampler_t smp_par) {
   // CHECK-LABEL: define spir_kernel void @foo(%opencl.sampler_t addrspace(2)* %smp_par)
@@ -45,32 +46,32 @@ kernel void foo(sampler_t smp_par) {
   fnc4smp(smp);
   // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // Case 1b
   fnc4smp(smp);
   // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19)
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // Case 1a/2a
   fnc4smp(glb_smp);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // Case 1a/2c
   fnc4smp(glb_smp_const);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // Case 1c
   fnc4smp(smp_par);
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   fnc4smp(5);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
   fnc4smp(const_smp);
@@ -78,12 +79,12 @@ kernel void foo(sampler_t smp_par) {
   // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]]
   fnc4smp(const_smp);
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]]
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
   fnc4smp(constant_smp);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35)
-  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]])
+  // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]])
 
   // TODO: enable sampler initialization with non-constant integer.
   //const sampler_t const_smp_func_init = get_sampler_initializer();

From d3941e663066eb120ea8b79983122897d503d1c2 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Jul 2019 15:14:01 +0000
Subject: [PATCH 240/451] [SWIG] Deprecate SWIG 1.x

The last swig 1.x release dates from 2009, now 10 years ago. Recently, I
fixed an issue that prevented us from using swig 4 (r364974), which
turned out to be not backward compatible with swig 1.x (r365718).

This patch deprecates this (really old) version of swig and makes swig 2
the minimum supported version in LLDB . This should be fine for the
build bots, which are all running swig 3 or later.

Differential revision: https://reviews.llvm.org/D64782

llvm-svn: 366213
---
 lldb/scripts/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lldb/scripts/CMakeLists.txt b/lldb/scripts/CMakeLists.txt
index 439852793f18a..1a0ea96ff9f65 100644
--- a/lldb/scripts/CMakeLists.txt
+++ b/lldb/scripts/CMakeLists.txt
@@ -14,6 +14,11 @@ if(LLDB_BUILD_FRAMEWORK)
 endif()
 
 find_package(SWIG REQUIRED)
+set(SWIG_MIN_VERSION "2.0.0")
+if (${SWIG_VERSION} VERSION_LESS ${SWIG_MIN_VERSION})
+  message(FATAL_ERROR "LLDB requires swig ${SWIG_MIN_VERSION}, your version is ${SWIG_VERSION}.")
+endif()
+
 add_custom_command(
   OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/LLDBWrapPython.cpp
   OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lldb.py

From f34a69c2e2792f9702f14cc64723ff89294ba911 Mon Sep 17 00:00:00 2001
From: Amaury Sechet <deadalnix@gmail.com>
Date: Tue, 16 Jul 2019 15:17:00 +0000
Subject: [PATCH 241/451] [DAGCombiner] fold (addcarry (xor a, -1), b, c) ->
 (subcarry b, a, !c) and flip carry.

Summary:
As per title. DAGCombiner only mathes the special case where b = 0, this patches extends the pattern to match any value of b.

Depends on D57302

Reviewers: hfinkel, RKSimon, craig.topper

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59208

llvm-svn: 366214
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 44 ++++++++++++-------
 llvm/test/CodeGen/X86/addcarry.ll             |  5 +--
 llvm/test/CodeGen/X86/subcarry.ll             | 30 +++++--------
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 93b87fbe026e9..49c922f560faf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2706,7 +2706,19 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL,
   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
 }
 
-static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) {
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use flipBoolean.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  bool Force) {
+  if (Force && isa<ConstantSDNode>(V))
+    return flipBoolean(V, SDLoc(V), DAG, TLI);
+
   if (V.getOpcode() != ISD::XOR)
     return SDValue();
 
@@ -2731,6 +2743,8 @@ static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) {
 
   if (IsFlip)
     return V.getOperand(0);
+  if (Force)
+    return flipBoolean(V, SDLoc(V), DAG, TLI);
   return SDValue();
 }
 
@@ -2843,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
   }
 
-  EVT CarryVT = CarryIn.getValueType();
-
   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
   if (isNullConstant(N0) && isNullConstant(N1)) {
     EVT VT = N0.getValueType();
+    EVT CarryVT = CarryIn.getValueType();
     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
     AddToWorklist(CarryExt.getNode());
     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2855,17 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
                      DAG.getConstant(0, DL, CarryVT));
   }
 
-  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
-  if (isBitwiseNot(N0) && isNullConstant(N1)) {
-    if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
-      SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
-                                DAG.getConstant(0, DL, N0.getValueType()),
-                                N0.getOperand(0), B);
-      return CombineTo(N, Sub,
-                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
-    }
-  }
-
   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
     return Combined;
 
@@ -2964,6 +2966,16 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
 
 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
                                        SDNode *N) {
+  // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+  if (isBitwiseNot(N0))
+    if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+      SDLoc DL(N);
+      SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+                                N0.getOperand(0), NotC);
+      return CombineTo(N, Sub,
+                       flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+    }
+
   // Iff the flag result is dead:
   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
@@ -8302,7 +8314,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   }
 
   // select (not Cond), N1, N2 -> select Cond, N2, N1
-  if (SDValue F = extractBooleanFlip(N0, TLI)) {
+  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
     SelectOp->setFlags(Flags);
     return SelectOp;
@@ -8797,7 +8809,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
     return V;
 
   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
-  if (SDValue F = extractBooleanFlip(N0, TLI))
+  if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
     return DAG.getSelect(DL, VT, F, N2, N1);
 
   // Canonicalize integer abs.
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index a085108d0b209..6c70fee99090b 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -391,11 +391,10 @@ define i128 @addcarry_to_subcarry(i64 %a, i64 %b) {
 ; CHECK-LABEL: addcarry_to_subcarry:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    cmpq %rsi, %rdi
 ; CHECK-NEXT:    notq %rsi
-; CHECK-NEXT:    movb $1, %cl
+; CHECK-NEXT:    setae %cl
 ; CHECK-NEXT:    addb $-1, %cl
-; CHECK-NEXT:    movq %rdi, %rcx
-; CHECK-NEXT:    adcq %rsi, %rcx
 ; CHECK-NEXT:    adcq $0, %rax
 ; CHECK-NEXT:    setb %cl
 ; CHECK-NEXT:    movzbl %cl, %edx
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 78ae3297ca3b8..449391616aa9d 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -90,37 +90,29 @@ entry:
 define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr {
 ; CHECK-LABEL: sub:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    movq (%rsi), %r10
 ; CHECK-NEXT:    movq 8(%rsi), %rdi
-; CHECK-NEXT:    movq %r10, %r11
-; CHECK-NEXT:    subq %rdx, %r11
-; CHECK-NEXT:    notq %rdx
-; CHECK-NEXT:    movb $1, %bl
-; CHECK-NEXT:    addb $-1, %bl
-; CHECK-NEXT:    adcq %r10, %rdx
+; CHECK-NEXT:    subq %rdx, %r10
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    addb $-1, %dl
 ; CHECK-NEXT:    adcq $0, %rdi
 ; CHECK-NEXT:    setb %dl
-; CHECK-NEXT:    movzbl %dl, %edx
+; CHECK-NEXT:    movzbl %dl, %r11d
 ; CHECK-NEXT:    notq %rcx
 ; CHECK-NEXT:    addq %rdi, %rcx
-; CHECK-NEXT:    adcq 16(%rsi), %rdx
-; CHECK-NEXT:    setb %bl
-; CHECK-NEXT:    movzbl %bl, %edi
+; CHECK-NEXT:    adcq 16(%rsi), %r11
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    movzbl %dl, %edx
 ; CHECK-NEXT:    notq %r8
-; CHECK-NEXT:    addq %rdx, %r8
-; CHECK-NEXT:    adcq 24(%rsi), %rdi
+; CHECK-NEXT:    addq %r11, %r8
+; CHECK-NEXT:    adcq 24(%rsi), %rdx
 ; CHECK-NEXT:    notq %r9
-; CHECK-NEXT:    addq %rdi, %r9
-; CHECK-NEXT:    movq %r11, (%rax)
+; CHECK-NEXT:    addq %rdx, %r9
+; CHECK-NEXT:    movq %r10, (%rax)
 ; CHECK-NEXT:    movq %rcx, 8(%rax)
 ; CHECK-NEXT:    movq %r8, 16(%rax)
 ; CHECK-NEXT:    movq %r9, 24(%rax)
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
 entry:
   %0 = extractvalue %S %arg.b, 0

From 228a7b4f2a3575da642017a3b55062488e710d46 Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Tue, 16 Jul 2019 15:23:10 +0000
Subject: [PATCH 242/451] [ADCE] Fix non-deterministic behaviour due to
 iterating over a pointer set.

Original patch by Yann Laigle-Chapuy

Differential Revision: https://reviews.llvm.org/D64785

llvm-svn: 366215
---
 llvm/lib/Transforms/Scalar/ADCE.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 8dcf6393f4602..7f7460c5746a7 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -135,7 +136,7 @@ class AggressiveDeadCodeElimination {
   SmallPtrSet<const Metadata *, 32> AliveScopes;
 
   /// Set of blocks with not known to have live terminators.
-  SmallPtrSet<BasicBlock *, 16> BlocksWithDeadTerminators;
+  SmallSetVector<BasicBlock *, 16> BlocksWithDeadTerminators;
 
   /// The set of blocks which we have determined whose control
   /// dependence sources must be live and which have not had
@@ -389,7 +390,7 @@ void AggressiveDeadCodeElimination::markLive(Instruction *I) {
   // Mark the containing block live
   auto &BBInfo = *Info.Block;
   if (BBInfo.Terminator == I) {
-    BlocksWithDeadTerminators.erase(BBInfo.BB);
+    BlocksWithDeadTerminators.remove(BBInfo.BB);
     // For live terminators, mark destination blocks
     // live to preserve this control flow edges.
     if (!BBInfo.UnconditionalBranch)
@@ -478,10 +479,14 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
   // which currently have dead terminators that are control
   // dependence sources of a block which is in NewLiveBlocks.
 
+  const SmallPtrSet<BasicBlock *, 16> BWDT{
+      BlocksWithDeadTerminators.begin(),
+      BlocksWithDeadTerminators.end()
+  };
   SmallVector<BasicBlock *, 32> IDFBlocks;
   ReverseIDFCalculator IDFs(PDT);
   IDFs.setDefiningBlocks(NewLiveBlocks);
-  IDFs.setLiveInBlocks(BlocksWithDeadTerminators);
+  IDFs.setLiveInBlocks(BWDT);
   IDFs.calculate(IDFBlocks);
   NewLiveBlocks.clear();
 

From cc909812a39d26ba4bcc8aaa49096155802c4521 Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Tue, 16 Jul 2019 15:24:59 +0000
Subject: [PATCH 243/451] [Remarks][NFC] Combine ParserFormat and
 SerializerFormat

It's useless to have both.

llvm-svn: 366216
---
 llvm/include/llvm/IR/RemarkStreamer.h         |  4 ---
 llvm/include/llvm/Remarks/Remark.h            |  1 -
 llvm/include/llvm/Remarks/RemarkFormat.h      | 33 +++++++++++++++++++
 llvm/include/llvm/Remarks/RemarkParser.h      |  7 ++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  1 +
 llvm/lib/IR/RemarkStreamer.cpp                | 22 +++----------
 llvm/lib/Remarks/CMakeLists.txt               |  1 +
 llvm/lib/Remarks/RemarkFormat.cpp             | 30 +++++++++++++++++
 llvm/lib/Remarks/RemarkParser.cpp             | 33 ++++++++++---------
 llvm/lib/Remarks/RemarkParserImpl.h           |  4 +--
 llvm/lib/Remarks/YAMLRemarkParser.h           |  4 +--
 llvm/tools/llvm-opt-report/OptReport.cpp      |  2 +-
 .../Remarks/YAMLRemarksParsingTest.cpp        | 10 +++---
 13 files changed, 100 insertions(+), 52 deletions(-)
 create mode 100644 llvm/include/llvm/Remarks/RemarkFormat.h
 create mode 100644 llvm/lib/Remarks/RemarkFormat.cpp

diff --git a/llvm/include/llvm/IR/RemarkStreamer.h b/llvm/include/llvm/IR/RemarkStreamer.h
index 9b6d82ee30c0c..c84de9aea3519 100644
--- a/llvm/include/llvm/IR/RemarkStreamer.h
+++ b/llvm/include/llvm/IR/RemarkStreamer.h
@@ -90,10 +90,6 @@ struct RemarkSetupFormatError : RemarkSetupErrorInfo<RemarkSetupFormatError> {
   using RemarkSetupErrorInfo<RemarkSetupFormatError>::RemarkSetupErrorInfo;
 };
 
-enum class RemarksSerializerFormat { Unknown, YAML };
-
-Expected<RemarksSerializerFormat> parseSerializerFormat(StringRef Format);
-
 /// Setup optimization remarks.
 Expected<std::unique_ptr<ToolOutputFile>>
 setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h
index d916728e0b9d2..4241fb1fda3b3 100644
--- a/llvm/include/llvm/Remarks/Remark.h
+++ b/llvm/include/llvm/Remarks/Remark.h
@@ -24,7 +24,6 @@ namespace llvm {
 namespace remarks {
 
 constexpr uint64_t Version = 0;
-constexpr StringRef Magic("REMARKS", 7);
 
 /// The debug location used to track a remark back to the source file.
 struct RemarkLocation {
diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h b/llvm/include/llvm/Remarks/RemarkFormat.h
new file mode 100644
index 0000000000000..e167d99d25172
--- /dev/null
+++ b/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -0,0 +1,33 @@
+//===-- llvm/Remarks/RemarkFormat.h - The format of remarks -----*- C++/-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities to deal with the format of remarks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REMARKS_REMARK_FORMAT_H
+#define LLVM_REMARKS_REMARK_FORMAT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace remarks {
+
+constexpr StringRef Magic("REMARKS", 7);
+
+/// The format used for serializing/deserializing remarks.
+enum class Format { Unknown, YAML };
+
+/// Parse and validate a string for the remark format.
+Expected<Format> parseFormat(StringRef FormatStr);
+
+} // end namespace remarks
+} // end namespace llvm
+
+#endif /* LLVM_REMARKS_REMARK_FORMAT_H */
diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h
index 457b2fbaa5f5d..b956f0c40250c 100644
--- a/llvm/include/llvm/Remarks/RemarkParser.h
+++ b/llvm/include/llvm/Remarks/RemarkParser.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Support/Error.h"
 #include <memory>
 
@@ -25,8 +26,6 @@ namespace remarks {
 struct ParserImpl;
 struct ParsedStringTable;
 
-enum class ParserFormat { YAML };
-
 /// Parser used to parse a raw buffer to remarks::Remark objects.
 struct Parser {
   /// The hidden implementation of the parser.
@@ -35,11 +34,11 @@ struct Parser {
   /// Create a parser parsing \p Buffer to Remark objects.
   /// This constructor should be only used for parsing remarks without a string
   /// table.
-  Parser(ParserFormat Format, StringRef Buffer);
+  Parser(Format ParserFormat, StringRef Buffer);
 
   /// Create a parser parsing \p Buffer to Remark objects, using \p StrTab as a
   /// string table.
-  Parser(ParserFormat Format, StringRef Buffer,
+  Parser(Format ParserFormat, StringRef Buffer,
          const ParsedStringTable &StrTab);
 
   // Needed because ParserImpl is an incomplete type.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 174a9bcfd9b2c..54f6cc2d5571a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -100,6 +100,7 @@
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Pass.h"
 #include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
 #include "llvm/Remarks/RemarkStringTable.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp
index 2c3bc8406e55e..32adef181f429 100644
--- a/llvm/lib/IR/RemarkStreamer.cpp
+++ b/llvm/lib/IR/RemarkStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/Remarks/RemarkFormat.h"
 
 using namespace llvm;
 
@@ -112,30 +113,16 @@ char RemarkSetupPatternError::ID = 0;
 char RemarkSetupFormatError::ID = 0;
 
 static std::unique_ptr<remarks::Serializer>
-formatToSerializer(RemarksSerializerFormat RemarksFormat, raw_ostream &OS) {
+formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) {
   switch (RemarksFormat) {
   default:
     llvm_unreachable("Unknown remark serializer format.");
     return nullptr;
-  case RemarksSerializerFormat::YAML:
+  case remarks::Format::YAML:
     return llvm::make_unique<remarks::YAMLSerializer>(OS);
   };
 }
 
-Expected<RemarksSerializerFormat>
-llvm::parseSerializerFormat(StringRef StrFormat) {
-  auto Format = StringSwitch<RemarksSerializerFormat>(StrFormat)
-                    .Cases("", "yaml", RemarksSerializerFormat::YAML)
-                    .Default(RemarksSerializerFormat::Unknown);
-
-  if (Format == RemarksSerializerFormat::Unknown)
-    return createStringError(std::make_error_code(std::errc::invalid_argument),
-                             "Unknown remark serializer format: '%s'",
-                             StrFormat.data());
-
-  return Format;
-}
-
 Expected<std::unique_ptr<ToolOutputFile>>
 llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
                                StringRef RemarksPasses, StringRef RemarksFormat,
@@ -158,8 +145,7 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
   if (EC)
     return make_error<RemarkSetupFileError>(errorCodeToError(EC));
 
-  Expected<RemarksSerializerFormat> Format =
-      parseSerializerFormat(RemarksFormat);
+  Expected<remarks::Format> Format = remarks::parseFormat(RemarksFormat);
   if (Error E = Format.takeError())
     return make_error<RemarkSetupFormatError>(std::move(E));
 
diff --git a/llvm/lib/Remarks/CMakeLists.txt b/llvm/lib/Remarks/CMakeLists.txt
index 73383597accd5..06ddbab6de5a3 100644
--- a/llvm/lib/Remarks/CMakeLists.txt
+++ b/llvm/lib/Remarks/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMRemarks
   Remark.cpp
+  RemarkFormat.cpp
   RemarkParser.cpp
   RemarkStringTable.cpp
   YAMLRemarkParser.cpp
diff --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp
new file mode 100644
index 0000000000000..bcd0f753ff64f
--- /dev/null
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -0,0 +1,30 @@
+//===- RemarkFormat.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of utilities to handle the different remark formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+using namespace llvm::remarks;
+
+Expected<Format> llvm::remarks::parseFormat(StringRef FormatStr) {
+  auto Result = StringSwitch<Format>(FormatStr)
+                    .Cases("", "yaml", Format::YAML)
+                    .Default(Format::Unknown);
+
+  if (Result == Format::Unknown)
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Unknown remark serializer format: '%s'",
+                             FormatStr.data());
+
+  return Result;
+}
diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index bd83ba488d8a5..41ed64d022b74 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -20,31 +20,35 @@
 using namespace llvm;
 using namespace llvm::remarks;
 
-static std::unique_ptr<ParserImpl> formatToParserImpl(ParserFormat Format,
+static std::unique_ptr<ParserImpl> formatToParserImpl(Format ParserFormat,
                                                       StringRef Buf) {
-  switch (Format) {
-  case ParserFormat::YAML:
+  switch (ParserFormat) {
+  case Format::YAML:
     return llvm::make_unique<YAMLParserImpl>(Buf);
+  case Format::Unknown:
+    llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum");
+    return nullptr;
   };
-  llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum");
 }
 
 static std::unique_ptr<ParserImpl>
-formatToParserImpl(ParserFormat Format, StringRef Buf,
+formatToParserImpl(Format ParserFormat, StringRef Buf,
                    const ParsedStringTable &StrTab) {
-  switch (Format) {
-  case ParserFormat::YAML:
+  switch (ParserFormat) {
+  case Format::YAML:
     return llvm::make_unique<YAMLParserImpl>(Buf, &StrTab);
+  case Format::Unknown:
+    llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum");
+    return nullptr;
   };
-  llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum");
 }
 
-Parser::Parser(ParserFormat Format, StringRef Buf)
-    : Impl(formatToParserImpl(Format, Buf)) {}
+Parser::Parser(Format ParserFormat, StringRef Buf)
+    : Impl(formatToParserImpl(ParserFormat, Buf)) {}
 
-Parser::Parser(ParserFormat Format, StringRef Buf,
+Parser::Parser(Format ParserFormat, StringRef Buf,
                const ParsedStringTable &StrTab)
-    : Impl(formatToParserImpl(Format, Buf, StrTab)) {}
+    : Impl(formatToParserImpl(ParserFormat, Buf, StrTab)) {}
 
 Parser::~Parser() = default;
 
@@ -110,9 +114,8 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)
 
 extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
                                                           uint64_t Size) {
-  return wrap(
-      new remarks::Parser(remarks::ParserFormat::YAML,
-                          StringRef(static_cast<const char *>(Buf), Size)));
+  return wrap(new remarks::Parser(
+      remarks::Format::YAML, StringRef(static_cast<const char *>(Buf), Size)));
 }
 
 static void handleYAMLError(remarks::YAMLParserImpl &Impl, Error E) {
diff --git a/llvm/lib/Remarks/RemarkParserImpl.h b/llvm/lib/Remarks/RemarkParserImpl.h
index 6b9329b1815cd..5f8c21dcdd44c 100644
--- a/llvm/lib/Remarks/RemarkParserImpl.h
+++ b/llvm/lib/Remarks/RemarkParserImpl.h
@@ -19,13 +19,13 @@ namespace llvm {
 namespace remarks {
 /// This is used as a base for any parser implementation.
 struct ParserImpl {
-  explicit ParserImpl(ParserFormat Format) : Format(Format) {}
+  explicit ParserImpl(Format ParserFormat) : ParserFormat(ParserFormat) {}
   // Virtual destructor prevents mismatched deletes
   virtual ~ParserImpl() {}
 
   // The parser format. This is used as a tag to safely cast between
   // implementations.
-  ParserFormat Format;
+  Format ParserFormat;
 };
 } // end namespace remarks
 } // end namespace llvm
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h
index 9ed18eebe7759..14698bbd3ca48 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.h
+++ b/llvm/lib/Remarks/YAMLRemarkParser.h
@@ -127,11 +127,11 @@ struct YAMLParserImpl : public ParserImpl {
 
   YAMLParserImpl(StringRef Buf,
                  Optional<const ParsedStringTable *> StrTab = None)
-      : ParserImpl{ParserFormat::YAML}, YAMLParser(Buf, StrTab),
+      : ParserImpl{Format::YAML}, YAMLParser(Buf, StrTab),
         YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}
 
   static bool classof(const ParserImpl *PI) {
-    return PI->Format == ParserFormat::YAML;
+    return PI->ParserFormat == Format::YAML;
   }
 };
 } // end namespace remarks
diff --git a/llvm/tools/llvm-opt-report/OptReport.cpp b/llvm/tools/llvm-opt-report/OptReport.cpp
index b263d9a4fb61f..80d0b73664d01 100644
--- a/llvm/tools/llvm-opt-report/OptReport.cpp
+++ b/llvm/tools/llvm-opt-report/OptReport.cpp
@@ -150,7 +150,7 @@ static bool readLocationInfo(LocationInfoTy &LocationInfo) {
     return false;
   }
 
-  remarks::Parser Parser(remarks::ParserFormat::YAML, (*Buf)->getBuffer());
+  remarks::Parser Parser(remarks::Format::YAML, (*Buf)->getBuffer());
 
   while (true) {
     Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
diff --git a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp
index 6cca4c5ce8c21..e3c7cdf881e3c 100644
--- a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp
+++ b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp
@@ -14,7 +14,7 @@
 using namespace llvm;
 
 template <size_t N> void parseGood(const char (&Buf)[N]) {
-  remarks::Parser Parser(remarks::ParserFormat::YAML, {Buf, N - 1});
+  remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1});
   Expected<const remarks::Remark *> Remark = Parser.getNext();
   EXPECT_FALSE(errorToBool(Remark.takeError())); // Check for parsing errors.
   EXPECT_TRUE(*Remark != nullptr);               // At least one remark.
@@ -25,7 +25,7 @@ template <size_t N> void parseGood(const char (&Buf)[N]) {
 
 template <size_t N>
 bool parseExpectError(const char (&Buf)[N], const char *Error) {
-  remarks::Parser Parser(remarks::ParserFormat::YAML, {Buf, N - 1});
+  remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1});
   Expected<const remarks::Remark *> Remark = Parser.getNext();
   EXPECT_FALSE(Remark); // Expect an error here.
 
@@ -354,7 +354,7 @@ TEST(YAMLRemarks, Contents) {
                   "  - String: ' because its definition is unavailable'\n"
                   "\n";
 
-  remarks::Parser Parser(remarks::ParserFormat::YAML, Buf);
+  remarks::Parser Parser(remarks::Format::YAML, Buf);
   Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
   EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
   EXPECT_TRUE(*RemarkOrErr != nullptr);
@@ -516,7 +516,7 @@ TEST(YAMLRemarks, ContentsStrTab) {
                 115);
 
   remarks::ParsedStringTable StrTab(StrTabBuf);
-  remarks::Parser Parser(remarks::ParserFormat::YAML, Buf, StrTab);
+  remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab);
   Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
   EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
   EXPECT_TRUE(*RemarkOrErr != nullptr);
@@ -584,7 +584,7 @@ TEST(YAMLRemarks, ParsingBadStringTableIndex) {
   StringRef StrTabBuf = StringRef("inline");
 
   remarks::ParsedStringTable StrTab(StrTabBuf);
-  remarks::Parser Parser(remarks::ParserFormat::YAML, Buf, StrTab);
+  remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab);
   Expected<const remarks::Remark *> Remark = Parser.getNext();
   EXPECT_FALSE(Remark); // Expect an error here.
 

From 94bad22c2c66f2178e0364c5f502f0225c1ede8e Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Tue, 16 Jul 2019 15:25:05 +0000
Subject: [PATCH 244/451] [Remarks] Simplify and refactor the RemarkParser
 interface

Before, everything was based on some kind of type erased parser
implementation which container a lot of boilerplate code when multiple
formats were to be supported.

This simplifies it by:

* the remark now owns its arguments
* *always* returning an error from the implementation side
* working around the way the YAML parser reports errors: catch them through
callbacks and re-insert them in a proper llvm::Error
* add a CParser wrapper that is used when implementing the C API to
avoid cluttering the C++ API with useless state
* LLVMRemarkParserGetNext now returns an object that needs to be
released to avoid leaking resources
* add a new API to dispose of a remark entry: LLVMRemarkEntryDispose

llvm-svn: 366217
---
 llvm/docs/Remarks.rst                         |   1 +
 llvm/include/llvm-c/Remarks.h                 |  23 +-
 llvm/include/llvm/IR/RemarkStreamer.h         |  12 +-
 llvm/include/llvm/Remarks/Remark.h            |  15 +-
 llvm/include/llvm/Remarks/RemarkParser.h      |  42 +-
 llvm/include/llvm/Support/SourceMgr.h         |   2 +
 llvm/lib/IR/RemarkStreamer.cpp                |  14 +-
 llvm/lib/Remarks/Remark.cpp                   |   4 +
 llvm/lib/Remarks/RemarkParser.cpp             | 143 ++----
 llvm/lib/Remarks/RemarkParserImpl.h           |  33 --
 llvm/lib/Remarks/YAMLRemarkParser.cpp         | 429 ++++++++++--------
 llvm/lib/Remarks/YAMLRemarkParser.h           | 130 ++----
 llvm/tools/llvm-opt-report/OptReport.cpp      |  26 +-
 llvm/tools/remarks-shlib/Remarks.exports      |   1 +
 .../Remarks/YAMLRemarksParsingTest.cpp        | 103 +++--
 15 files changed, 485 insertions(+), 493 deletions(-)
 delete mode 100644 llvm/lib/Remarks/RemarkParserImpl.h

diff --git a/llvm/docs/Remarks.rst b/llvm/docs/Remarks.rst
index 8215efbeebcfb..e3d088d777d29 100644
--- a/llvm/docs/Remarks.rst
+++ b/llvm/docs/Remarks.rst
@@ -295,6 +295,7 @@ The typical usage through the C API is like the following:
     LLVMRemarkEntryRef Remark = NULL;
     while ((Remark = LLVMRemarkParserGetNext(Parser))) {
        // use Remark
+       LLVMRemarkEntryDispose(Remark); // Release memory.
     }
     bool HasError = LLVMRemarkParserHasError(Parser);
     LLVMRemarkParserDispose(Parser);
diff --git a/llvm/include/llvm-c/Remarks.h b/llvm/include/llvm-c/Remarks.h
index 7fb16656a9a58..88eb5120c57c6 100644
--- a/llvm/include/llvm-c/Remarks.h
+++ b/llvm/include/llvm-c/Remarks.h
@@ -136,6 +136,13 @@ extern LLVMRemarkDebugLocRef LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg);
  */
 typedef struct LLVMRemarkOpaqueEntry *LLVMRemarkEntryRef;
 
+/**
+ * Free the resources used by the remark entry.
+ *
+ * \since REMARKS_API_VERSION=0
+ */
+extern void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark);
+
 /**
  * The type of the remark. For example, it can allow users to only keep the
  * missed optimizations from the compiler.
@@ -161,7 +168,7 @@ extern LLVMRemarkStringRef
 LLVMRemarkEntryGetRemarkName(LLVMRemarkEntryRef Remark);
 
 /**
- * Get the name of the function being processsed when the remark was emitted.
+ * Get the name of the function being processed when the remark was emitted.
  *
  * \since REMARKS_API_VERSION=0
  */
@@ -199,6 +206,8 @@ extern uint32_t LLVMRemarkEntryGetNumArgs(LLVMRemarkEntryRef Remark);
  *
  * If there are no arguments in \p Remark, the return value will be `NULL`.
  *
+ * The lifetime of the returned value is bound to the lifetime of \p Remark.
+ *
  * \since REMARKS_API_VERSION=0
  */
 extern LLVMRemarkArgRef LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark);
@@ -208,6 +217,8 @@ extern LLVMRemarkArgRef LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark);
  *
  * Returns `NULL` if there are no more arguments available.
  *
+ * The lifetime of the returned value is bound to the lifetime of \p Remark.
+ *
  * \since REMARKS_API_VERSION=0
  */
 extern LLVMRemarkArgRef LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef It,
@@ -232,8 +243,11 @@ extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
 /**
  * Returns the next remark in the file.
  *
- * The value pointed to by the return value is invalidated by the next call to
- * LLVMRemarkParserGetNext().
+ * The value pointed to by the return value needs to be disposed using a call to
+ * LLVMRemarkEntryDispose().
+ *
+ * All the entries in the returned value that are of LLVMRemarkStringRef type
+ * will become invalidated once a call to LLVMRemarkParserDispose is made.
  *
  * If the parser reaches the end of the buffer, the return value will be `NULL`.
  *
@@ -258,8 +272,9 @@ extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
  * ```
  * LLVMRemarkParserRef Parser = LLVMRemarkParserCreateYAML(Buf, Size);
  * LLVMRemarkEntryRef Remark = NULL;
- * while ((Remark == LLVMRemarkParserGetNext(Parser))) {
+ * while ((Remark = LLVMRemarkParserGetNext(Parser))) {
  *    // use Remark
+ *    LLVMRemarkEntryDispose(Remark); // Release memory.
  * }
  * bool HasError = LLVMRemarkParserHasError(Parser);
  * LLVMRemarkParserDispose(Parser);
diff --git a/llvm/include/llvm/IR/RemarkStreamer.h b/llvm/include/llvm/IR/RemarkStreamer.h
index c84de9aea3519..f34cc660b2fb1 100644
--- a/llvm/include/llvm/IR/RemarkStreamer.h
+++ b/llvm/include/llvm/IR/RemarkStreamer.h
@@ -32,15 +32,9 @@ class RemarkStreamer {
   /// The object used to serialize the remarks to a specific format.
   std::unique_ptr<remarks::Serializer> Serializer;
 
-  /// Temporary buffer for converting diagnostics into remark objects. This is
-  /// used for the remark arguments that are converted from a vector of
-  /// diagnostic arguments to a vector of remark arguments.
-  SmallVector<remarks::Argument, 8> TmpArgs;
-  /// Convert diagnostics into remark objects. The result uses \p TmpArgs as a
-  /// temporary buffer for the remark arguments, and relies on all the strings
-  /// to be kept in memory until the next call to `toRemark`.
-  /// The lifetime of the members of the result is bound to the lifetime of both
-  /// the remark streamer and the LLVM diagnostics.
+  /// Convert diagnostics into remark objects.
+  /// The lifetime of the members of the result is bound to the lifetime of
+  /// the LLVM diagnostics.
   remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag);
 
 public:
diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h
index 4241fb1fda3b3..05d0ea60accde 100644
--- a/llvm/include/llvm/Remarks/Remark.h
+++ b/llvm/include/llvm/Remarks/Remark.h
@@ -85,10 +85,23 @@ struct Remark {
   Optional<uint64_t> Hotness;
 
   /// Arguments collected via the streaming interface.
-  ArrayRef<Argument> Args;
+  SmallVector<Argument, 5> Args;
+
+  Remark() = default;
+  Remark(Remark &&) = default;
+  Remark &operator=(Remark &&) = default;
 
   /// Return a message composed from the arguments as a string.
   std::string getArgsAsMsg() const;
+
+  /// Clone this remark to explicitly ask for a copy.
+  Remark clone() const { return *this; }
+
+private:
+  /// In order to avoid unwanted copies, "delete" the copy constructor.
+  /// If a copy is needed, it should be done through `Remark::clone()`.
+  Remark(const Remark &) = default;
+  Remark& operator=(const Remark &) = default;
 };
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h
index b956f0c40250c..671e1abe5ec77 100644
--- a/llvm/include/llvm/Remarks/RemarkParser.h
+++ b/llvm/include/llvm/Remarks/RemarkParser.h
@@ -26,27 +26,33 @@ namespace remarks {
 struct ParserImpl;
 struct ParsedStringTable;
 
+class EndOfFileError : public ErrorInfo<EndOfFileError> {
+public:
+  static char ID;
+
+  EndOfFileError() {}
+
+  void log(raw_ostream &OS) const override { OS << "End of file reached."; }
+  std::error_code convertToErrorCode() const override {
+    return inconvertibleErrorCode();
+  }
+};
+
 /// Parser used to parse a raw buffer to remarks::Remark objects.
 struct Parser {
-  /// The hidden implementation of the parser.
-  std::unique_ptr<ParserImpl> Impl;
-
-  /// Create a parser parsing \p Buffer to Remark objects.
-  /// This constructor should be only used for parsing remarks without a string
-  /// table.
-  Parser(Format ParserFormat, StringRef Buffer);
+  /// The format of the parser.
+  Format ParserFormat;
 
-  /// Create a parser parsing \p Buffer to Remark objects, using \p StrTab as a
-  /// string table.
-  Parser(Format ParserFormat, StringRef Buffer,
-         const ParsedStringTable &StrTab);
+  Parser(Format ParserFormat) : ParserFormat(ParserFormat) {}
 
-  // Needed because ParserImpl is an incomplete type.
-  ~Parser();
+  /// If no error occurs, this returns a valid Remark object.
+  /// If an error of type EndOfFileError occurs, it is safe to recover from it
+  /// by stopping the parsing.
+  /// If any other error occurs, it should be propagated to the user.
+  /// The pointer should never be null.
+  virtual Expected<std::unique_ptr<Remark>> next() = 0;
 
-  /// Returns an empty Optional if it reached the end.
-  /// Returns a valid remark otherwise.
-  Expected<const Remark *> getNext() const;
+  virtual ~Parser() = default;
 };
 
 /// In-memory representation of the string table parsed from a buffer (e.g. the
@@ -61,6 +67,10 @@ struct ParsedStringTable {
   ParsedStringTable(StringRef Buffer);
 };
 
+Expected<std::unique_ptr<Parser>>
+createRemarkParser(Format ParserFormat, StringRef Buf,
+                   Optional<const ParsedStringTable *> StrTab = None);
+
 } // end namespace remarks
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h
index 7b081d32f99e5..aa6026c23d07a 100644
--- a/llvm/include/llvm/Support/SourceMgr.h
+++ b/llvm/include/llvm/Support/SourceMgr.h
@@ -106,6 +106,8 @@ class SourceMgr {
   SourceMgr() = default;
   SourceMgr(const SourceMgr &) = delete;
   SourceMgr &operator=(const SourceMgr &) = delete;
+  SourceMgr(SourceMgr &&) = default;
+  SourceMgr &operator=(SourceMgr &&) = default;
   ~SourceMgr() = default;
 
   void setIncludeDirs(const std::vector<std::string> &Dirs) {
diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp
index 32adef181f429..5b4c7e72b479f 100644
--- a/llvm/lib/IR/RemarkStreamer.cpp
+++ b/llvm/lib/IR/RemarkStreamer.cpp
@@ -72,9 +72,6 @@ toRemarkLocation(const DiagnosticLocation &DL) {
 /// LLVM Diagnostic -> Remark
 remarks::Remark
 RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) {
-  // Re-use the buffer.
-  TmpArgs.clear();
-
   remarks::Remark R; // The result.
   R.RemarkType = toRemarkType(static_cast<DiagnosticKind>(Diag.getKind()));
   R.PassName = Diag.getPassName();
@@ -84,15 +81,12 @@ RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) {
   R.Loc = toRemarkLocation(Diag.getLocation());
   R.Hotness = Diag.getHotness();
 
-  // Use TmpArgs to build the list of arguments and re-use the memory allocated
-  // from previous remark conversions.
   for (const DiagnosticInfoOptimizationBase::Argument &Arg : Diag.getArgs()) {
-    TmpArgs.emplace_back();
-    TmpArgs.back().Key = Arg.Key;
-    TmpArgs.back().Val = Arg.Val;
-    TmpArgs.back().Loc = toRemarkLocation(Arg.Loc);
+    R.Args.emplace_back();
+    R.Args.back().Key = Arg.Key;
+    R.Args.back().Val = Arg.Val;
+    R.Args.back().Loc = toRemarkLocation(Arg.Loc);
   }
-  R.Args = TmpArgs; // This is valid until the next call to this function.
 
   return R;
 }
diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp
index b4be19f47a5bc..401ac514b0117 100644
--- a/llvm/lib/Remarks/Remark.cpp
+++ b/llvm/lib/Remarks/Remark.cpp
@@ -66,6 +66,10 @@ LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg) {
   return nullptr;
 }
 
+extern "C" void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark) {
+  delete unwrap(Remark);
+}
+
 extern "C" LLVMRemarkType LLVMRemarkEntryGetType(LLVMRemarkEntryRef Remark) {
   // Assume here that the enums can be converted both ways.
   return static_cast<LLVMRemarkType>(unwrap(Remark)->RemarkType);
diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index 41ed64d022b74..46130d28f72c5 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -20,69 +20,7 @@
 using namespace llvm;
 using namespace llvm::remarks;
 
-static std::unique_ptr<ParserImpl> formatToParserImpl(Format ParserFormat,
-                                                      StringRef Buf) {
-  switch (ParserFormat) {
-  case Format::YAML:
-    return llvm::make_unique<YAMLParserImpl>(Buf);
-  case Format::Unknown:
-    llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum");
-    return nullptr;
-  };
-}
-
-static std::unique_ptr<ParserImpl>
-formatToParserImpl(Format ParserFormat, StringRef Buf,
-                   const ParsedStringTable &StrTab) {
-  switch (ParserFormat) {
-  case Format::YAML:
-    return llvm::make_unique<YAMLParserImpl>(Buf, &StrTab);
-  case Format::Unknown:
-    llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum");
-    return nullptr;
-  };
-}
-
-Parser::Parser(Format ParserFormat, StringRef Buf)
-    : Impl(formatToParserImpl(ParserFormat, Buf)) {}
-
-Parser::Parser(Format ParserFormat, StringRef Buf,
-               const ParsedStringTable &StrTab)
-    : Impl(formatToParserImpl(ParserFormat, Buf, StrTab)) {}
-
-Parser::~Parser() = default;
-
-static Expected<const Remark *> getNextYAML(YAMLParserImpl &Impl) {
-  YAMLRemarkParser &YAMLParser = Impl.YAMLParser;
-  // Check for EOF.
-  if (Impl.YAMLIt == Impl.YAMLParser.Stream.end())
-    return nullptr;
-
-  auto CurrentIt = Impl.YAMLIt;
-
-  // Try to parse an entry.
-  if (Error E = YAMLParser.parseYAMLElement(*CurrentIt)) {
-    // Set the iterator to the end, in case the user calls getNext again.
-    Impl.YAMLIt = Impl.YAMLParser.Stream.end();
-    return std::move(E);
-  }
-
-  // Move on.
-  ++Impl.YAMLIt;
-
-  // Return the just-parsed remark.
-  if (const Optional<YAMLRemarkParser::ParseState> &State = YAMLParser.State)
-    return &State->TheRemark;
-  else
-    return createStringError(std::make_error_code(std::errc::invalid_argument),
-                             "unexpected error while parsing.");
-}
-
-Expected<const Remark *> Parser::getNext() const {
-  if (auto *Impl = dyn_cast<YAMLParserImpl>(this->Impl.get()))
-    return getNextYAML(*Impl);
-  llvm_unreachable("Get next called with an unknown parsing implementation.");
-}
+char EndOfFileError::ID = 0;
 
 ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) {
   while (!InBuffer.empty()) {
@@ -109,59 +47,70 @@ Expected<StringRef> ParsedStringTable::operator[](size_t Index) const {
   return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1);
 }
 
+Expected<std::unique_ptr<Parser>>
+llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf,
+                                  Optional<const ParsedStringTable *> StrTab) {
+  switch (ParserFormat) {
+  case Format::YAML:
+    return llvm::make_unique<YAMLRemarkParser>(Buf, StrTab);
+  case Format::Unknown:
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "Unknown remark parser format.");
+  }
+}
+
+// Wrapper that holds the state needed to interact with the C API.
+struct CParser {
+  std::unique_ptr<Parser> TheParser;
+  Optional<std::string> Err;
+
+  CParser(Format ParserFormat, StringRef Buf,
+          Optional<const ParsedStringTable *> StrTab = None)
+      : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {}
+
+  void handleError(Error E) { Err.emplace(toString(std::move(E))); }
+  bool hasError() const { return Err.hasValue(); }
+  const char *getMessage() const { return Err ? Err->c_str() : nullptr; };
+};
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
-DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef)
 
 extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf,
                                                           uint64_t Size) {
-  return wrap(new remarks::Parser(
-      remarks::Format::YAML, StringRef(static_cast<const char *>(Buf), Size)));
-}
-
-static void handleYAMLError(remarks::YAMLParserImpl &Impl, Error E) {
-  handleAllErrors(
-      std::move(E),
-      [&](const YAMLParseError &PE) {
-        Impl.YAMLParser.Stream.printError(&PE.getNode(),
-                                          Twine(PE.getMessage()) + Twine('\n'));
-      },
-      [&](const ErrorInfoBase &EIB) { EIB.log(Impl.YAMLParser.ErrorStream); });
-  Impl.HasErrors = true;
+  return wrap(new CParser(Format::YAML,
+                          StringRef(static_cast<const char *>(Buf), Size)));
 }
 
 extern "C" LLVMRemarkEntryRef
 LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) {
-  remarks::Parser &TheParser = *unwrap(Parser);
-
-  Expected<const remarks::Remark *> RemarkOrErr = TheParser.getNext();
-  if (!RemarkOrErr) {
-    // Error during parsing.
-    if (auto *Impl = dyn_cast<remarks::YAMLParserImpl>(TheParser.Impl.get()))
-      handleYAMLError(*Impl, RemarkOrErr.takeError());
-    else
-      llvm_unreachable("unkown parser implementation.");
+  CParser &TheCParser = *unwrap(Parser);
+  remarks::Parser &TheParser = *TheCParser.TheParser;
+
+  Expected<std::unique_ptr<Remark>> MaybeRemark = TheParser.next();
+  if (Error E = MaybeRemark.takeError()) {
+    if (E.isA<EndOfFileError>()) {
+      consumeError(std::move(E));
+      return nullptr;
+    }
+
+    // Handle the error. Allow it to be checked through HasError and
+    // GetErrorMessage.
+    TheCParser.handleError(std::move(E));
     return nullptr;
   }
 
-  if (*RemarkOrErr == nullptr)
-    return nullptr;
   // Valid remark.
-  return wrap(*RemarkOrErr);
+  return wrap(MaybeRemark->release());
 }
 
 extern "C" LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser) {
-  if (auto *Impl =
-          dyn_cast<remarks::YAMLParserImpl>(unwrap(Parser)->Impl.get()))
-    return Impl->HasErrors;
-  llvm_unreachable("unkown parser implementation.");
+  return unwrap(Parser)->hasError();
 }
 
 extern "C" const char *
 LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser) {
-  if (auto *Impl =
-          dyn_cast<remarks::YAMLParserImpl>(unwrap(Parser)->Impl.get()))
-    return Impl->YAMLParser.ErrorStream.str().c_str();
-  llvm_unreachable("unkown parser implementation.");
+  return unwrap(Parser)->getMessage();
 }
 
 extern "C" void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser) {
diff --git a/llvm/lib/Remarks/RemarkParserImpl.h b/llvm/lib/Remarks/RemarkParserImpl.h
deleted file mode 100644
index 5f8c21dcdd44c..0000000000000
--- a/llvm/lib/Remarks/RemarkParserImpl.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- RemarkParserImpl.h - Implementation details -------------*- C++/-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides implementation details for the remark parser.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_REMARKS_REMARK_PARSER_IMPL_H
-#define LLVM_REMARKS_REMARK_PARSER_IMPL_H
-
-#include "llvm/Remarks/RemarkParser.h"
-
-namespace llvm {
-namespace remarks {
-/// This is used as a base for any parser implementation.
-struct ParserImpl {
-  explicit ParserImpl(Format ParserFormat) : ParserFormat(ParserFormat) {}
-  // Virtual destructor prevents mismatched deletes
-  virtual ~ParserImpl() {}
-
-  // The parser format. This is used as a tag to safely cast between
-  // implementations.
-  Format ParserFormat;
-};
-} // end namespace remarks
-} // end namespace llvm
-
-#endif /* LLVM_REMARKS_REMARK_PARSER_IMPL_H */
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp
index c70eef556ffd5..ed78b7ba5d951 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp
@@ -20,255 +20,308 @@ using namespace llvm::remarks;
 
 char YAMLParseError::ID = 0;
 
-Error YAMLRemarkParser::parseKey(StringRef &Result, yaml::KeyValueNode &Node) {
-  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey())) {
-    Result = Key->getRawValue();
+static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
+  assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
+  std::string &Message = *static_cast<std::string *>(Ctx);
+  assert(Message.empty() && "Expected an empty string.");
+  raw_string_ostream OS(Message);
+  Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
+             /*ShowKindLabels*/ true);
+  OS << '\n';
+  OS.flush();
+}
+
+YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
+                               yaml::Stream &Stream, yaml::Node &Node) {
+  // 1) Set up a diagnostic handler to avoid errors being printed out to
+  // stderr.
+  // 2) Use the stream to print the error with the associated node.
+  // 3) The stream will use the source manager to print the error, which will
+  // call the diagnostic handler.
+  // 4) The diagnostic handler will stream the error directly into this object's
+  // Message member, which is used when logging is asked for.
+  auto OldDiagHandler = SM.getDiagHandler();
+  auto OldDiagCtx = SM.getDiagContext();
+  SM.setDiagHandler(handleDiagnostic, &Message);
+  Stream.printError(&Node, Twine(Msg) + Twine('\n'));
+  // Restore the old handlers.
+  SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
+}
+
+static SourceMgr setupSM(std::string &LastErrorMessage) {
+  SourceMgr SM;
+  SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
+  return SM;
+}
+
+YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
+                                   Optional<const ParsedStringTable *> StrTab)
+    : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(),
+      SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
+
+Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
+  return make_error<YAMLParseError>(Message, SM, Stream, Node);
+}
+
+Error YAMLRemarkParser::error() {
+  if (LastErrorMessage.empty())
     return Error::success();
+  Error E = make_error<YAMLParseError>(LastErrorMessage);
+  LastErrorMessage.clear();
+  return E;
+}
+
+Expected<std::unique_ptr<Remark>>
+YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
+  if (Error E = error())
+    return std::move(E);
+
+  yaml::Node *YAMLRoot = RemarkEntry.getRoot();
+  if (!YAMLRoot) {
+    return createStringError(std::make_error_code(std::errc::invalid_argument),
+                             "not a valid YAML file.");
   }
 
-  return make_error<YAMLParseError>("key is not a string.", Node);
+  auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
+  if (!Root)
+    return error("document root is not of mapping type.", *YAMLRoot);
+
+  std::unique_ptr<Remark> Result = llvm::make_unique<Remark>();
+  Remark &TheRemark = *Result;
+
+  // First, the type. It needs special handling since is not part of the
+  // key-value stream.
+  Expected<Type> T = parseType(*Root);
+  if (!T)
+    return T.takeError();
+  else
+    TheRemark.RemarkType = *T;
+
+  // Then, parse the fields, one by one.
+  for (yaml::KeyValueNode &RemarkField : *Root) {
+    Expected<StringRef> MaybeKey = parseKey(RemarkField);
+    if (!MaybeKey)
+      return MaybeKey.takeError();
+    StringRef KeyName = *MaybeKey;
+
+    if (KeyName == "Pass") {
+      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+        TheRemark.PassName = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Name") {
+      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+        TheRemark.RemarkName = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Function") {
+      if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
+        TheRemark.FunctionName = *MaybeStr;
+      else
+        return MaybeStr.takeError();
+    } else if (KeyName == "Hotness") {
+      if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
+        TheRemark.Hotness = *MaybeU;
+      else
+        return MaybeU.takeError();
+    } else if (KeyName == "DebugLoc") {
+      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
+        TheRemark.Loc = *MaybeLoc;
+      else
+        return MaybeLoc.takeError();
+    } else if (KeyName == "Args") {
+      auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
+      if (!Args)
+        return error("wrong value type for key.", RemarkField);
+
+      for (yaml::Node &Arg : *Args) {
+        if (Expected<Argument> MaybeArg = parseArg(Arg))
+          TheRemark.Args.push_back(*MaybeArg);
+        else
+          return MaybeArg.takeError();
+      }
+    } else {
+      return error("unknown key.", RemarkField);
+    }
+  }
+
+  // Check if any of the mandatory fields are missing.
+  if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
+      TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
+    return error("Type, Pass, Name or Function missing.",
+                 *RemarkEntry.getRoot());
+
+  return std::move(Result);
 }
 
-template <typename T>
-Error YAMLRemarkParser::parseStr(T &Result, yaml::KeyValueNode &Node) {
+Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
+  auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
+                  .Case("!Passed", remarks::Type::Passed)
+                  .Case("!Missed", remarks::Type::Missed)
+                  .Case("!Analysis", remarks::Type::Analysis)
+                  .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
+                  .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
+                  .Case("!Failure", remarks::Type::Failure)
+                  .Default(remarks::Type::Unknown);
+  if (Type == remarks::Type::Unknown)
+    return error("expected a remark tag.", Node);
+  return Type;
+}
+
+Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
+  if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
+    return Key->getRawValue();
+
+  return error("key is not a string.", Node);
+}
+
+Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
   if (!Value)
-    return make_error<YAMLParseError>("expected a value of scalar type.", Node);
-  StringRef Tmp;
+    return error("expected a value of scalar type.", Node);
+  StringRef Result;
   if (!StrTab) {
-    Tmp = Value->getRawValue();
+    Result = Value->getRawValue();
   } else {
     // If we have a string table, parse it as an unsigned.
     unsigned StrID = 0;
-    if (Error E = parseUnsigned(StrID, Node))
-      return E;
+    if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
+      StrID = *MaybeStrID;
+    else
+      return MaybeStrID.takeError();
+
     if (Expected<StringRef> Str = (**StrTab)[StrID])
-      Tmp = *Str;
+      Result = *Str;
     else
       return Str.takeError();
   }
 
-  if (Tmp.front() == '\'')
-    Tmp = Tmp.drop_front();
-
-  if (Tmp.back() == '\'')
-    Tmp = Tmp.drop_back();
+  if (Result.front() == '\'')
+    Result = Result.drop_front();
 
-  Result = Tmp;
+  if (Result.back() == '\'')
+    Result = Result.drop_back();
 
-  return Error::success();
+  return Result;
 }
 
-template <typename T>
-Error YAMLRemarkParser::parseUnsigned(T &Result, yaml::KeyValueNode &Node) {
+Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
   SmallVector<char, 4> Tmp;
   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
   if (!Value)
-    return make_error<YAMLParseError>("expected a value of scalar type.", Node);
+    return error("expected a value of scalar type.", Node);
   unsigned UnsignedValue = 0;
   if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
-    return make_error<YAMLParseError>("expected a value of integer type.",
-                                      *Value);
-  Result = UnsignedValue;
-  return Error::success();
-}
-
-Error YAMLRemarkParser::parseType(Type &Result, yaml::MappingNode &Node) {
-  auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
-                  .Case("!Passed", remarks::Type::Passed)
-                  .Case("!Missed", remarks::Type::Missed)
-                  .Case("!Analysis", remarks::Type::Analysis)
-                  .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
-                  .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
-                  .Case("!Failure", remarks::Type::Failure)
-                  .Default(remarks::Type::Unknown);
-  if (Type == remarks::Type::Unknown)
-    return make_error<YAMLParseError>("expected a remark tag.", Node);
-  Result = Type;
-  return Error::success();
+    return error("expected a value of integer type.", *Value);
+  return UnsignedValue;
 }
 
-Error YAMLRemarkParser::parseDebugLoc(Optional<RemarkLocation> &Result,
-                                      yaml::KeyValueNode &Node) {
+Expected<RemarkLocation>
+YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
   auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
   if (!DebugLoc)
-    return make_error<YAMLParseError>("expected a value of mapping type.",
-                                      Node);
+    return error("expected a value of mapping type.", Node);
 
   Optional<StringRef> File;
   Optional<unsigned> Line;
   Optional<unsigned> Column;
 
   for (yaml::KeyValueNode &DLNode : *DebugLoc) {
-    StringRef KeyName;
-    if (Error E = parseKey(KeyName, DLNode))
-      return E;
+    Expected<StringRef> MaybeKey = parseKey(DLNode);
+    if (!MaybeKey)
+      return MaybeKey.takeError();
+    StringRef KeyName = *MaybeKey;
+
     if (KeyName == "File") {
-      if (Error E = parseStr(File, DLNode))
-        return E;
+      if (Expected<StringRef> MaybeStr = parseStr(DLNode))
+        File = *MaybeStr;
+      else
+        return MaybeStr.takeError();
     } else if (KeyName == "Column") {
-      if (Error E = parseUnsigned(Column, DLNode))
-        return E;
+      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
+        Column = *MaybeU;
+      else
+        return MaybeU.takeError();
     } else if (KeyName == "Line") {
-      if (Error E = parseUnsigned(Line, DLNode))
-        return E;
+      if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
+        Line = *MaybeU;
+      else
+        return MaybeU.takeError();
     } else {
-      return make_error<YAMLParseError>("unknown entry in DebugLoc map.",
-                                        DLNode);
+      return error("unknown entry in DebugLoc map.", DLNode);
     }
   }
 
   // If any of the debug loc fields is missing, return an error.
   if (!File || !Line || !Column)
-    return make_error<YAMLParseError>("DebugLoc node incomplete.", Node);
-
-  Result = RemarkLocation{*File, *Line, *Column};
-
-  return Error::success();
-}
-
-Error YAMLRemarkParser::parseRemarkField(yaml::KeyValueNode &RemarkField) {
-
-  StringRef KeyName;
-  if (Error E = parseKey(KeyName, RemarkField))
-    return E;
-
-  if (KeyName == "Pass") {
-    if (Error E = parseStr(State->TheRemark.PassName, RemarkField))
-      return E;
-  } else if (KeyName == "Name") {
-    if (Error E = parseStr(State->TheRemark.RemarkName, RemarkField))
-      return E;
-  } else if (KeyName == "Function") {
-    if (Error E = parseStr(State->TheRemark.FunctionName, RemarkField))
-      return E;
-  } else if (KeyName == "Hotness") {
-    State->TheRemark.Hotness = 0;
-    if (Error E = parseUnsigned(*State->TheRemark.Hotness, RemarkField))
-      return E;
-  } else if (KeyName == "DebugLoc") {
-    if (Error E = parseDebugLoc(State->TheRemark.Loc, RemarkField))
-      return E;
-  } else if (KeyName == "Args") {
-    auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
-    if (!Args)
-      return make_error<YAMLParseError>("wrong value type for key.",
-                                        RemarkField);
-
-    for (yaml::Node &Arg : *Args)
-      if (Error E = parseArg(State->Args, Arg))
-        return E;
-
-    State->TheRemark.Args = State->Args;
-  } else {
-    return make_error<YAMLParseError>("unknown key.", RemarkField);
-  }
+    return error("DebugLoc node incomplete.", Node);
 
-  return Error::success();
+  return RemarkLocation{*File, *Line, *Column};
 }
 
-Error YAMLRemarkParser::parseArg(SmallVectorImpl<Argument> &Args,
-                                 yaml::Node &Node) {
+Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
   auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
   if (!ArgMap)
-    return make_error<YAMLParseError>("expected a value of mapping type.",
-                                      Node);
+    return error("expected a value of mapping type.", Node);
 
-  StringRef KeyStr;
-  StringRef ValueStr;
+  Optional<StringRef> KeyStr;
+  Optional<StringRef> ValueStr;
   Optional<RemarkLocation> Loc;
 
-  for (yaml::KeyValueNode &ArgEntry : *ArgMap)
-    if (Error E = parseArgEntry(ArgEntry, KeyStr, ValueStr, Loc))
-      return E;
-
-  if (KeyStr.empty())
-    return make_error<YAMLParseError>("argument key is missing.", *ArgMap);
-  if (ValueStr.empty())
-    return make_error<YAMLParseError>("argument value is missing.", *ArgMap);
+  for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
+    Expected<StringRef> MaybeKey = parseKey(ArgEntry);
+    if (!MaybeKey)
+      return MaybeKey.takeError();
+    StringRef KeyName = *MaybeKey;
+
+    // Try to parse debug locs.
+    if (KeyName == "DebugLoc") {
+      // Can't have multiple DebugLoc entries per argument.
+      if (Loc)
+        return error("only one DebugLoc entry is allowed per argument.",
+                     ArgEntry);
+
+      if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
+        Loc = *MaybeLoc;
+        continue;
+      } else
+        return MaybeLoc.takeError();
+    }
 
-  Args.push_back(Argument{KeyStr, ValueStr, Loc});
+    // If we already have a string, error out.
+    if (ValueStr)
+      return error("only one string entry is allowed per argument.", ArgEntry);
 
-  return Error::success();
-}
+    // Try to parse the value.
+    if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
+      ValueStr = *MaybeStr;
+    else
+      return MaybeStr.takeError();
 
-Error YAMLRemarkParser::parseArgEntry(yaml::KeyValueNode &ArgEntry,
-                                      StringRef &KeyStr, StringRef &ValueStr,
-                                      Optional<RemarkLocation> &Loc) {
-  StringRef KeyName;
-  if (Error E = parseKey(KeyName, ArgEntry))
-    return E;
-
-  // Try to parse debug locs.
-  if (KeyName == "DebugLoc") {
-    // Can't have multiple DebugLoc entries per argument.
-    if (Loc)
-      return make_error<YAMLParseError>(
-          "only one DebugLoc entry is allowed per argument.", ArgEntry);
-
-    if (Error E = parseDebugLoc(Loc, ArgEntry))
-      return E;
-    return Error::success();
+    // Keep the key from the string.
+    KeyStr = KeyName;
   }
 
-  // If we already have a string, error out.
-  if (!ValueStr.empty())
-    return make_error<YAMLParseError>(
-        "only one string entry is allowed per argument.", ArgEntry);
+  if (!KeyStr)
+    return error("argument key is missing.", *ArgMap);
+  if (!ValueStr)
+    return error("argument value is missing.", *ArgMap);
 
-  // Try to parse a string.
-  if (Error E = parseStr(ValueStr, ArgEntry))
-    return E;
-
-  // Keep the key from the string.
-  KeyStr = KeyName;
-  return Error::success();
+  return Argument{*KeyStr, *ValueStr, Loc};
 }
 
-Error YAMLRemarkParser::parseYAMLElement(yaml::Document &Remark) {
-  // Parsing a new remark, clear the previous one by re-constructing the state
-  // in-place in the Optional.
-  State.emplace(TmpArgs);
+Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
+  if (YAMLIt == Stream.end())
+    return make_error<EndOfFileError>();
 
-  yaml::Node *YAMLRoot = Remark.getRoot();
-  if (!YAMLRoot)
-    return createStringError(std::make_error_code(std::errc::invalid_argument),
-                             "not a valid YAML file.");
-
-  auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
-  if (!Root)
-    return make_error<YAMLParseError>("document root is not of mapping type.",
-                                      *YAMLRoot);
-
-  if (Error E = parseType(State->TheRemark.RemarkType, *Root))
-    return E;
-
-  for (yaml::KeyValueNode &RemarkField : *Root)
-    if (Error E = parseRemarkField(RemarkField))
-      return E;
+  Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
+  if (!MaybeResult) {
+    // Avoid garbage input, set the iterator to the end.
+    YAMLIt = Stream.end();
+    return MaybeResult.takeError();
+  }
 
-  // If the YAML parsing failed, don't even continue parsing. We might
-  // encounter malformed YAML.
-  if (Stream.failed())
-    return make_error<YAMLParseError>("YAML parsing failed.",
-                                      *Remark.getRoot());
+  ++YAMLIt;
 
-  // Check if any of the mandatory fields are missing.
-  if (State->TheRemark.RemarkType == Type::Unknown ||
-      State->TheRemark.PassName.empty() ||
-      State->TheRemark.RemarkName.empty() ||
-      State->TheRemark.FunctionName.empty())
-    return make_error<YAMLParseError>("Type, Pass, Name or Function missing.",
-                                      *Remark.getRoot());
-
-  return Error::success();
-}
-
-/// Handle a diagnostic from the YAML stream. Records the error in the
-/// YAMLRemarkParser class.
-void YAMLRemarkParser::HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
-  assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
-  auto *Parser = static_cast<YAMLRemarkParser *>(Ctx);
-  Diag.print(/*ProgName=*/nullptr, Parser->ErrorStream, /*ShowColors*/ false,
-             /*ShowKindLabels*/ true);
+  return std::move(*MaybeResult);
 }
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h
index 14698bbd3ca48..cea76e63e75c1 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.h
+++ b/llvm/lib/Remarks/YAMLRemarkParser.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_REMARKS_YAML_REMARK_PARSER_H
 #define LLVM_REMARKS_YAML_REMARK_PARSER_H
 
-#include "RemarkParserImpl.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Remarks/Remark.h"
@@ -27,112 +26,69 @@
 
 namespace llvm {
 namespace remarks {
-/// Parses and holds the state of the latest parsed remark.
-struct YAMLRemarkParser {
-  /// Source manager for better error messages.
-  SourceMgr SM;
-  /// Stream for yaml parsing.
-  yaml::Stream Stream;
-  /// Storage for the error stream.
-  std::string ErrorString;
-  /// The error stream.
-  raw_string_ostream ErrorStream;
-  /// Temporary parsing buffer for the arguments.
-  SmallVector<Argument, 8> TmpArgs;
-  /// The string table used for parsing strings.
-  Optional<const ParsedStringTable *> StrTab;
-  /// The state used by the parser to parse a remark entry. Invalidated with
-  /// every call to `parseYAMLElement`.
-  struct ParseState {
-    /// Temporary parsing buffer for the arguments.
-    /// The parser itself is owning this buffer in order to reduce the number of
-    /// allocations.
-    SmallVectorImpl<Argument> &Args;
-    Remark TheRemark;
-
-    ParseState(SmallVectorImpl<Argument> &Args) : Args(Args) {}
-    /// Use Args only as a **temporary** buffer.
-    ~ParseState() { Args.clear(); }
-  };
-
-  /// The current state of the parser. If the parsing didn't start yet, it will
-  /// not be containing any value.
-  Optional<ParseState> State;
-
-  YAMLRemarkParser(StringRef Buf,
-                   Optional<const ParsedStringTable *> StrTab = None)
-      : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString),
-        TmpArgs(), StrTab(StrTab) {
-    SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this);
-  }
-
-  /// Parse a YAML element.
-  Error parseYAMLElement(yaml::Document &Remark);
-
-private:
-  /// Parse one key to a string.
-  /// otherwise.
-  Error parseKey(StringRef &Result, yaml::KeyValueNode &Node);
-  /// Parse one value to a string.
-  template <typename T> Error parseStr(T &Result, yaml::KeyValueNode &Node);
-  /// Parse one value to an unsigned.
-  template <typename T>
-  Error parseUnsigned(T &Result, yaml::KeyValueNode &Node);
-  /// Parse the type of a remark to an enum type.
-  Error parseType(Type &Result, yaml::MappingNode &Node);
-  /// Parse a debug location.
-  Error parseDebugLoc(Optional<RemarkLocation> &Result,
-                      yaml::KeyValueNode &Node);
-  /// Parse a remark field and update the parsing state.
-  Error parseRemarkField(yaml::KeyValueNode &RemarkField);
-  /// Parse an argument.
-  Error parseArg(SmallVectorImpl<Argument> &TmpArgs, yaml::Node &Node);
-  /// Parse an entry from the contents of an argument.
-  Error parseArgEntry(yaml::KeyValueNode &ArgEntry, StringRef &KeyStr,
-                      StringRef &ValueStr, Optional<RemarkLocation> &Loc);
-
-  /// Handle a diagnostic from the YAML stream. Records the error in the
-  /// YAMLRemarkParser class.
-  static void HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx);
-};
 
 class YAMLParseError : public ErrorInfo<YAMLParseError> {
 public:
   static char ID;
 
-  YAMLParseError(StringRef Message, yaml::Node &Node)
-      : Message(Message), Node(Node) {}
+  YAMLParseError(StringRef Message, SourceMgr &SM, yaml::Stream &Stream,
+                 yaml::Node &Node);
+
+  YAMLParseError(StringRef Message) : Message(Message) {}
 
   void log(raw_ostream &OS) const override { OS << Message; }
   std::error_code convertToErrorCode() const override {
     return inconvertibleErrorCode();
   }
 
-  StringRef getMessage() const { return Message; }
-  yaml::Node &getNode() const { return Node; }
-
 private:
-  StringRef Message; // No need to hold a full copy of the buffer.
-  yaml::Node &Node;
+  std::string Message;
 };
 
 /// Regular YAML to Remark parser.
-struct YAMLParserImpl : public ParserImpl {
-  /// The object parsing the YAML.
-  YAMLRemarkParser YAMLParser;
+struct YAMLRemarkParser : public Parser {
+  /// The string table used for parsing strings.
+  Optional<const ParsedStringTable *> StrTab;
+  /// Last error message that can come from the YAML parser diagnostics.
+  /// We need this for catching errors in the constructor.
+  std::string LastErrorMessage;
+  /// Source manager for better error messages.
+  SourceMgr SM;
+  /// Stream for yaml parsing.
+  yaml::Stream Stream;
   /// Iterator in the YAML stream.
   yaml::document_iterator YAMLIt;
-  /// Set to `true` if we had any errors during parsing.
-  bool HasErrors = false;
 
-  YAMLParserImpl(StringRef Buf,
-                 Optional<const ParsedStringTable *> StrTab = None)
-      : ParserImpl{Format::YAML}, YAMLParser(Buf, StrTab),
-        YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {}
+  YAMLRemarkParser(StringRef Buf,
+                   Optional<const ParsedStringTable *> StrTab = None);
+
+  Expected<std::unique_ptr<Remark>> next() override;
 
-  static bool classof(const ParserImpl *PI) {
-    return PI->ParserFormat == Format::YAML;
+  static bool classof(const Parser *P) {
+    return P->ParserFormat == Format::YAML;
   }
+
+private:
+  /// Create a YAMLParseError error from an existing error generated by the YAML
+  /// parser.
+  /// If there is no error, this returns Success.
+  Error error();
+  /// Create a YAMLParseError error referencing a specific node.
+  Error error(StringRef Message, yaml::Node &Node);
+  /// Parse a YAML remark to a remarks::Remark object.
+  Expected<std::unique_ptr<Remark>> parseRemark(yaml::Document &Remark);
+  /// Parse the type of a remark to an enum type.
+  Expected<Type> parseType(yaml::MappingNode &Node);
+  /// Parse one key to a string.
+  Expected<StringRef> parseKey(yaml::KeyValueNode &Node);
+  /// Parse one value to a string.
+  Expected<StringRef> parseStr(yaml::KeyValueNode &Node);
+  /// Parse one value to an unsigned.
+  Expected<unsigned> parseUnsigned(yaml::KeyValueNode &Node);
+  /// Parse a debug location.
+  Expected<RemarkLocation> parseDebugLoc(yaml::KeyValueNode &Node);
+  /// Parse an argument.
+  Expected<Argument> parseArg(yaml::Node &Node);
 };
 } // end namespace remarks
 } // end namespace llvm
diff --git a/llvm/tools/llvm-opt-report/OptReport.cpp b/llvm/tools/llvm-opt-report/OptReport.cpp
index 80d0b73664d01..5662c9fbd7b5e 100644
--- a/llvm/tools/llvm-opt-report/OptReport.cpp
+++ b/llvm/tools/llvm-opt-report/OptReport.cpp
@@ -150,20 +150,32 @@ static bool readLocationInfo(LocationInfoTy &LocationInfo) {
     return false;
   }
 
-  remarks::Parser Parser(remarks::Format::YAML, (*Buf)->getBuffer());
+  Expected<std::unique_ptr<remarks::Parser>> MaybeParser =
+      remarks::createRemarkParser(remarks::Format::YAML, (*Buf)->getBuffer());
+  if (!MaybeParser) {
+    handleAllErrors(MaybeParser.takeError(), [&](const ErrorInfoBase &PE) {
+      PE.log(WithColor::error());
+    });
+    return false;
+  }
+  remarks::Parser &Parser = **MaybeParser;
 
   while (true) {
-    Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
-    if (!RemarkOrErr) {
-      handleAllErrors(RemarkOrErr.takeError(), [&](const ErrorInfoBase &PE) {
+    Expected<std::unique_ptr<remarks::Remark>> MaybeRemark = Parser.next();
+    if (!MaybeRemark) {
+      Error E = MaybeRemark.takeError();
+      if (E.isA<remarks::EndOfFileError>()) {
+        // EOF.
+        consumeError(std::move(E));
+        break;
+      }
+      handleAllErrors(MaybeRemark.takeError(), [&](const ErrorInfoBase &PE) {
         PE.log(WithColor::error());
       });
       return false;
     }
-    if (!*RemarkOrErr) // End of file.
-      break;
 
-    const remarks::Remark &Remark = **RemarkOrErr;
+    const remarks::Remark &Remark = **MaybeRemark;
 
     bool Transformed = Remark.RemarkType == remarks::Type::Passed;
 
diff --git a/llvm/tools/remarks-shlib/Remarks.exports b/llvm/tools/remarks-shlib/Remarks.exports
index 7260f9a543d5d..9ec1e73a471e0 100644
--- a/llvm/tools/remarks-shlib/Remarks.exports
+++ b/llvm/tools/remarks-shlib/Remarks.exports
@@ -6,6 +6,7 @@ LLVMRemarkDebugLocGetSourceColumn
 LLVMRemarkArgGetKey
 LLVMRemarkArgGetValue
 LLVMRemarkArgGetDebugLoc
+LLVMRemarkEntryDispose
 LLVMRemarkEntryGetType
 LLVMRemarkEntryGetPassName
 LLVMRemarkEntryGetRemarkName
diff --git a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp
index e3c7cdf881e3c..8b79dfd814f05 100644
--- a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp
+++ b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp
@@ -14,20 +14,31 @@
 using namespace llvm;
 
 template <size_t N> void parseGood(const char (&Buf)[N]) {
-  remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1});
-  Expected<const remarks::Remark *> Remark = Parser.getNext();
+  Expected<std::unique_ptr<remarks::Parser>> MaybeParser =
+      remarks::createRemarkParser(remarks::Format::YAML, {Buf, N - 1});
+  EXPECT_FALSE(errorToBool(MaybeParser.takeError()));
+  EXPECT_TRUE(*MaybeParser != nullptr);
+
+  remarks::Parser &Parser = **MaybeParser;
+  Expected<std::unique_ptr<remarks::Remark>> Remark = Parser.next();
   EXPECT_FALSE(errorToBool(Remark.takeError())); // Check for parsing errors.
   EXPECT_TRUE(*Remark != nullptr);               // At least one remark.
-  Remark = Parser.getNext();
-  EXPECT_FALSE(errorToBool(Remark.takeError())); // Check for parsing errors.
-  EXPECT_TRUE(*Remark == nullptr); // Check that there are no more remarks.
+  Remark = Parser.next();
+  Error E = Remark.takeError();
+  EXPECT_TRUE(E.isA<remarks::EndOfFileError>());
+  EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors.
 }
 
 template <size_t N>
 bool parseExpectError(const char (&Buf)[N], const char *Error) {
-  remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1});
-  Expected<const remarks::Remark *> Remark = Parser.getNext();
-  EXPECT_FALSE(Remark); // Expect an error here.
+  Expected<std::unique_ptr<remarks::Parser>> MaybeParser =
+      remarks::createRemarkParser(remarks::Format::YAML, {Buf, N - 1});
+  EXPECT_FALSE(errorToBool(MaybeParser.takeError()));
+  EXPECT_TRUE(*MaybeParser != nullptr);
+
+  remarks::Parser &Parser = **MaybeParser;
+  Expected<std::unique_ptr<remarks::Remark>> Remark = Parser.next();
+  EXPECT_FALSE(Remark); // Check for parsing errors.
 
   std::string ErrorStr;
   raw_string_ostream Stream(ErrorStr);
@@ -42,7 +53,7 @@ TEST(YAMLRemarks, ParsingEmpty) {
 
 TEST(YAMLRemarks, ParsingNotYAML) {
   EXPECT_TRUE(
-      parseExpectError("\x01\x02\x03\x04\x05\x06", "not a valid YAML file."));
+      parseExpectError("\x01\x02\x03\x04\x05\x06", "Got empty plain scalar"));
 }
 
 TEST(YAMLRemarks, ParsingGood) {
@@ -309,17 +320,6 @@ TEST(YAMLRemarks, ParsingWrongArgs) {
                    "",
                    "only one string entry is allowed per argument."));
   // No arg value.
-  EXPECT_TRUE(parseExpectError("\n"
-                   "--- !Missed\n"
-                   "Pass: inline\n"
-                   "Name: NoDefinition\n"
-                   "Function: foo\n"
-                   "Args:\n"
-                   "  - Callee: ''\n"
-                   "  - DebugLoc: { File: a, Line: 1, Column: 2 }\n"
-                   "",
-                   "argument value is missing."));
-  // No arg value.
   EXPECT_TRUE(parseExpectError("\n"
                    "--- !Missed\n"
                    "Pass: inline\n"
@@ -354,12 +354,18 @@ TEST(YAMLRemarks, Contents) {
                   "  - String: ' because its definition is unavailable'\n"
                   "\n";
 
-  remarks::Parser Parser(remarks::Format::YAML, Buf);
-  Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
-  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
-  EXPECT_TRUE(*RemarkOrErr != nullptr);
+  Expected<std::unique_ptr<remarks::Parser>> MaybeParser =
+      remarks::createRemarkParser(remarks::Format::YAML, Buf);
+  EXPECT_FALSE(errorToBool(MaybeParser.takeError()));
+  EXPECT_TRUE(*MaybeParser != nullptr);
+
+  remarks::Parser &Parser = **MaybeParser;
+  Expected<std::unique_ptr<remarks::Remark>> MaybeRemark = Parser.next();
+  EXPECT_FALSE(
+      errorToBool(MaybeRemark.takeError())); // Check for parsing errors.
+  EXPECT_TRUE(*MaybeRemark != nullptr);      // At least one remark.
 
-  const remarks::Remark &Remark = **RemarkOrErr;
+  const remarks::Remark &Remark = **MaybeRemark;
   EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed);
   EXPECT_EQ(checkStr(Remark.PassName, 6), "inline");
   EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition");
@@ -408,9 +414,10 @@ TEST(YAMLRemarks, Contents) {
     ++ArgID;
   }
 
-  RemarkOrErr = Parser.getNext();
-  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
-  EXPECT_EQ(*RemarkOrErr, nullptr);
+  MaybeRemark = Parser.next();
+  Error E = MaybeRemark.takeError();
+  EXPECT_TRUE(E.isA<remarks::EndOfFileError>());
+  EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors.
 }
 
 static inline StringRef checkStr(LLVMRemarkStringRef Str,
@@ -487,6 +494,8 @@ TEST(YAMLRemarks, ContentsCAPI) {
     ++ArgID;
   } while ((Arg = LLVMRemarkEntryGetNextArg(Arg, Remark)));
 
+  LLVMRemarkEntryDispose(Remark);
+
   EXPECT_EQ(LLVMRemarkParserGetNext(Parser), nullptr);
 
   EXPECT_FALSE(LLVMRemarkParserHasError(Parser));
@@ -516,12 +525,18 @@ TEST(YAMLRemarks, ContentsStrTab) {
                 115);
 
   remarks::ParsedStringTable StrTab(StrTabBuf);
-  remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab);
-  Expected<const remarks::Remark *> RemarkOrErr = Parser.getNext();
-  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
-  EXPECT_TRUE(*RemarkOrErr != nullptr);
-
-  const remarks::Remark &Remark = **RemarkOrErr;
+  Expected<std::unique_ptr<remarks::Parser>> MaybeParser =
+      remarks::createRemarkParser(remarks::Format::YAML, Buf, &StrTab);
+  EXPECT_FALSE(errorToBool(MaybeParser.takeError()));
+  EXPECT_TRUE(*MaybeParser != nullptr);
+
+  remarks::Parser &Parser = **MaybeParser;
+  Expected<std::unique_ptr<remarks::Remark>> MaybeRemark = Parser.next();
+  EXPECT_FALSE(
+      errorToBool(MaybeRemark.takeError())); // Check for parsing errors.
+  EXPECT_TRUE(*MaybeRemark != nullptr);      // At least one remark.
+
+  const remarks::Remark &Remark = **MaybeRemark;
   EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed);
   EXPECT_EQ(checkStr(Remark.PassName, 6), "inline");
   EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition");
@@ -570,9 +585,10 @@ TEST(YAMLRemarks, ContentsStrTab) {
     ++ArgID;
   }
 
-  RemarkOrErr = Parser.getNext();
-  EXPECT_FALSE(errorToBool(RemarkOrErr.takeError()));
-  EXPECT_EQ(*RemarkOrErr, nullptr);
+  MaybeRemark = Parser.next();
+  Error E = MaybeRemark.takeError();
+  EXPECT_TRUE(E.isA<remarks::EndOfFileError>());
+  EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors.
 }
 
 TEST(YAMLRemarks, ParsingBadStringTableIndex) {
@@ -584,13 +600,18 @@ TEST(YAMLRemarks, ParsingBadStringTableIndex) {
   StringRef StrTabBuf = StringRef("inline");
 
   remarks::ParsedStringTable StrTab(StrTabBuf);
-  remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab);
-  Expected<const remarks::Remark *> Remark = Parser.getNext();
-  EXPECT_FALSE(Remark); // Expect an error here.
+  Expected<std::unique_ptr<remarks::Parser>> MaybeParser =
+      remarks::createRemarkParser(remarks::Format::YAML, Buf, &StrTab);
+  EXPECT_FALSE(errorToBool(MaybeParser.takeError()));
+  EXPECT_TRUE(*MaybeParser != nullptr);
+
+  remarks::Parser &Parser = **MaybeParser;
+  Expected<std::unique_ptr<remarks::Remark>> MaybeRemark = Parser.next();
+  EXPECT_FALSE(MaybeRemark); // Expect an error here.
 
   std::string ErrorStr;
   raw_string_ostream Stream(ErrorStr);
-  handleAllErrors(Remark.takeError(),
+  handleAllErrors(MaybeRemark.takeError(),
                   [&](const ErrorInfoBase &EIB) { EIB.log(Stream); });
   EXPECT_TRUE(
       StringRef(Stream.str())

From 88ed076e44756f5418be17596cec943eea864f30 Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Tue, 16 Jul 2019 15:28:29 +0000
Subject: [PATCH 245/451] Add missing test for r366215

llvm-svn: 366218
---
 .../blocks-with-dead-term-nondeterministic.ll | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll

diff --git a/llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll b/llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll
new file mode 100644
index 0000000000000..dd8a70e4993c0
--- /dev/null
+++ b/llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -adce --preserve-ll-uselistorder -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK: uselistorder label %bb16, { 1, 0 }
+; Function Attrs: noinline nounwind ssp uwtable
+define void @ham() local_unnamed_addr #0 {
+bb:
+  br i1 false, label %bb1, label %bb22
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb20
+
+bb2:                                              ; preds = %bb1
+  br label %bb5
+
+bb5:                                              ; preds = %bb16, %bb2
+  br i1 undef, label %bb6, label %bb17
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb7, label %bb16
+
+bb7:                                              ; preds = %bb6
+  br i1 undef, label %bb9, label %bb8
+
+bb8:                                              ; preds = %bb7
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb8, %bb7
+  br label %bb13
+
+bb10:                                             ; preds = %bb8
+  br label %bb12
+
+bb12:                                             ; preds = %bb10
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb9
+  br label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  br label %bb16
+
+bb16:                                             ; preds = %bb15, %bb6
+  br label %bb5
+
+bb17:                                             ; preds = %bb5
+  br label %bb19
+
+bb19:                                             ; preds = %bb17
+  br label %bb21
+
+bb20:                                             ; preds = %bb1
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %bb19
+  br label %bb22
+
+bb22:                                             ; preds = %bb21, %bb
+  ret void
+}
+
+attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 7, !"PIC Level", i32 2}

From 2eacf698802b5c8bc3c9e0031ac11cd0d79f9eae Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Tue, 16 Jul 2019 15:33:43 +0000
Subject: [PATCH 246/451] Revert [tools] [llvm-nm] Default to reading from
 stdin not a.out

This reverts r365889 (git commit 60c81354b1d3fced1bd284d334f118d2d792ab4b)

llvm-svn: 366219
---
 llvm/docs/CommandGuide/llvm-nm.rst |  5 +++--
 llvm/test/tools/llvm-nm/stdin.test | 33 ------------------------------
 llvm/tools/llvm-nm/llvm-nm.cpp     |  9 +-------
 3 files changed, 4 insertions(+), 43 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-nm/stdin.test

diff --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst
index aa58a3f3884ca..f071e1be1a5c9 100644
--- a/llvm/docs/CommandGuide/llvm-nm.rst
+++ b/llvm/docs/CommandGuide/llvm-nm.rst
@@ -13,8 +13,9 @@ DESCRIPTION
 
 The :program:`llvm-nm` utility lists the names of symbols from LLVM bitcode
 files, object files, and archives. Each symbol is listed along with some simple
-information about its provenance. If no filename is specified, or *-* is used as
-a filename, :program:`llvm-nm` will read a file from its standard input stream.
+information about its provenance. If no filename is specified, *a.out* is used
+as the input. If *-* is used as a filename, :program:`llvm-nm` will read a file
+from its standard input stream.
 
 :program:`llvm-nm`'s default output format is the traditional BSD :program:`nm`
 output format. Each such output record consists of an (optional) 8-digit
diff --git a/llvm/test/tools/llvm-nm/stdin.test b/llvm/test/tools/llvm-nm/stdin.test
deleted file mode 100644
index 352ee23a30ab9..0000000000000
--- a/llvm/test/tools/llvm-nm/stdin.test
+++ /dev/null
@@ -1,33 +0,0 @@
-## Test llvm-nm when using stdin both explicitly (using '-' as a filename)
-## and implicitly (not specifying any filename).
-
-# RUN: yaml2obj %s -o %t.o
-
-## Pass an explicit filename to produce a baseline output. llvm-nm should
-## have the same behavior when opening a file itself and when reading that
-## file from its standard input stream.
-# RUN: llvm-nm %t.o > %t.base 2> %t.err
-
-## Make sure there is no warning message about no file redirected to stdin.
-# RUN: FileCheck %s --input-file=%t.err --allow-empty --implicit-check-not={{.}}
-
-# RUN: llvm-nm - < %t.o > %t.explicit 2> %t.err
-# RUN: FileCheck %s --input-file=%t.err --allow-empty --implicit-check-not={{.}}
-# RUN: cmp %t.base %t.explicit
-
-# RUN: llvm-nm < %t.o > %t.implicit 2> %t.err
-# RUN: FileCheck %s --input-file=%t.err --allow-empty --implicit-check-not={{.}}
-# RUN: cmp %t.base %t.implicit
-
-!ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol_a
-    Section: .text
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index c45c8716f18dc..aa62e6f0209b4 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -34,7 +34,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetSelect.h"
@@ -1752,12 +1751,6 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
 }
 
 static void dumpSymbolNamesFromFile(std::string &Filename) {
-  if (Filename == "-" && sys::Process::StandardInIsUserInput()) {
-    WithColor::warning(errs(), ToolName) << "can't read from terminal\n";
-    cl::PrintHelpMessage();
-    HadError = true;
-    return;
-  }
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getFileOrSTDIN(Filename);
   if (error(BufferOrErr.getError(), Filename))
@@ -2089,7 +2082,7 @@ int main(int argc, char **argv) {
   if (OutputFormat == sysv || SizeSort)
     PrintSize = true;
   if (InputFilenames.empty())
-    InputFilenames.push_back("-");
+    InputFilenames.push_back("a.out");
   if (InputFilenames.size() > 1)
     MultipleFiles = true;
 

From 42816107f78e6ce345e68960ff2183d1cabfe815 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Tue, 16 Jul 2019 15:51:32 +0000
Subject: [PATCH 247/451] [OPENMP]Fix threadid in __kmpc_omp_taskwait call for
 dependent target calls.

Summary:
We used to call __kmpc_omp_taskwait function with global threadid set to
0. It may crash the application at the runtime if the thread executing
 target region is not a master thread.

Reviewers: grokos, kkwli0

Subscribers: guansong, jdoerfert, caomhin, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D64571

llvm-svn: 366220
---
 openmp/libomptarget/src/interface.cpp         | 10 +--
 .../test/offloading/target_depend_nowait.cpp  | 62 +++++++++++++++++++
 2 files changed, 67 insertions(+), 5 deletions(-)
 create mode 100644 openmp/libomptarget/test/offloading/target_depend_nowait.cpp

diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 52850ee39782d..32afe3fcb4b70 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -128,7 +128,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
     int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, 0);
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
                           arg_types);
@@ -181,7 +181,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
     int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, 0);
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
                         arg_types);
@@ -214,7 +214,7 @@ EXTERN void __tgt_target_data_update_nowait(
     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
     int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, 0);
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
                            arg_types);
@@ -255,7 +255,7 @@ EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
     int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, 0);
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
                       arg_types);
@@ -298,7 +298,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
     int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, 0);
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
                             arg_sizes, arg_types, team_num, thread_limit);
diff --git a/openmp/libomptarget/test/offloading/target_depend_nowait.cpp b/openmp/libomptarget/test/offloading/target_depend_nowait.cpp
new file mode 100644
index 0000000000000..2c1c7e7191882
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/target_depend_nowait.cpp
@@ -0,0 +1,62 @@
+// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu
+
+#include <omp.h>
+#include <stdio.h>
+
+#define N 1024
+
+int A[N];
+int B[N];
+int C[N];
+int main() {
+  for (int i = 0; i < N; i++)
+    A[i] = B[i] = i;
+
+#pragma omp parallel num_threads(2)
+  {
+    if (omp_get_thread_num() == 1) {
+// map data A & B and move to
+#pragma omp target enter data map(to : A, B) depend(out : A[0]) nowait
+
+// no data move since already mapped
+#pragma omp target map(A, B) depend(out : A[0]) nowait
+      {
+        for (int i = 0; i < N; i++)
+          ++A[i];
+        for (int i = 0; i < N; i++)
+          ++B[i];
+      }
+
+// no data move since already mapped
+#pragma omp target teams num_teams(1) map(A, B) depend(out : A[0]) nowait
+      {
+        for (int i = 0; i < N; i++)
+          ++A[i];
+        for (int i = 0; i < N; i++)
+          ++B[i];
+      }
+
+// A updated via update
+#pragma omp target update from(A) depend(out : A[0]) nowait
+
+// B updated via exit, A just released
+#pragma omp target exit data map(release                                       \
+                                 : A) map(from                                 \
+                                          : B) depend(out                      \
+                                                      : A[0]) nowait
+    } // if
+  }   // parallel
+
+  int Sum = 0;
+  for (int i = 0; i < N; i++)
+    Sum += A[i] + B[i];
+  // Sum is 2 * N * (2 + N - 1 + 2) / 2
+  // CHECK: Sum = 1051648.
+  printf("Sum = %d.\n", Sum);
+
+  return Sum != 2 * N * (2 + N - 1 + 2) / 2;
+}
+

From 63a0c2bce8e5731f90fc99b75d59f2e85283cb3b Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Jul 2019 15:54:33 +0000
Subject: [PATCH 248/451] Revert "[swig] Add workaround for old swig"

With the deprecation of swig 1.x (r366213), this workaround should no
longer be necessary.

llvm-svn: 366221
---
 lldb/scripts/interface/SBTypeCategory.i | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lldb/scripts/interface/SBTypeCategory.i b/lldb/scripts/interface/SBTypeCategory.i
index a881c1590fd8a..c183862702df9 100644
--- a/lldb/scripts/interface/SBTypeCategory.i
+++ b/lldb/scripts/interface/SBTypeCategory.i
@@ -213,11 +213,6 @@ namespace lldb {
             name = property(GetName, None)
             enabled = property(GetEnabled, SetEnabled)
         %}
-#if SWIG_VERSION < 0x030009
-        %pythoncode %{
-           __swig_setmethods__["enabled"] = SetEnabled
-        %}
-#endif
 
     };
 

From 450c62e33ea5310481b06d3fd59df911f5451ff2 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Tue, 16 Jul 2019 15:55:45 +0000
Subject: [PATCH 249/451] [Strict FP] Allow more relaxed scheduling

Reimplement scheduling constraints for strict FP instructions in
ScheduleDAGInstrs::buildSchedGraph to allow for more relaxed
scheduling.  Specifially, allow one strict FP instruction to
be scheduled across another, as long as it is not moved across
any global barrier.

Differential Revision: https://reviews.llvm.org/D64412

Reviewed By: cameron.mcinally

llvm-svn: 366222
---
 llvm/lib/CodeGen/ScheduleDAGInstrs.cpp        |  31 ++-
 llvm/test/CodeGen/SystemZ/fp-strict-alias.ll  | 222 ++++++++++++------
 .../vector-constrained-fp-intrinsics.ll       |  30 +--
 3 files changed, 186 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index a538010889607..d5ad7e92299de 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -712,7 +712,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   AAForDep = UseAA ? AA : nullptr;
 
   BarrierChain = nullptr;
-  SUnit *FPBarrierChain = nullptr;
 
   this->TrackLaneMasks = TrackLaneMasks;
   MISUnitMap.clear();
@@ -744,6 +743,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   // done.
   Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
 
+  // Track all instructions that may raise floating-point exceptions.
+  // These do not depend on one other (or normal loads or stores), but
+  // must not be rescheduled across global barriers.  Note that we don't
+  // really need a "map" here since we don't track those MIs by value;
+  // using the same Value2SUsMap data type here is simply a matter of
+  // convenience.
+  Value2SUsMap FPExceptions;
+
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
   // without emitting the info from the previous call.
   DbgValues.clear();
@@ -871,20 +878,24 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       addBarrierChain(Loads);
       addBarrierChain(NonAliasStores);
       addBarrierChain(NonAliasLoads);
-
-      // Add dependency against previous FP barrier and reset FP barrier.
-      if (FPBarrierChain)
-        FPBarrierChain->addPredBarrier(BarrierChain);
-      FPBarrierChain = BarrierChain;
+      addBarrierChain(FPExceptions);
 
       continue;
     }
 
-    // Instructions that may raise FP exceptions depend on each other.
+    // Instructions that may raise FP exceptions may not be moved
+    // across any global barriers.
     if (MI.mayRaiseFPException()) {
-      if (FPBarrierChain)
-        FPBarrierChain->addPredBarrier(SU);
-      FPBarrierChain = SU;
+      if (BarrierChain)
+        BarrierChain->addPredBarrier(SU);
+
+      FPExceptions.insert(SU, UnknownValue);
+
+      if (FPExceptions.size() >= HugeRegion) {
+        LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
+        Value2SUsMap empty;
+        reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
+      }
     }
 
     // If it's not a store or a variant load, we're done.
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll b/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll
index d8ee018b9e5b6..fe27b61c20bab 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll
@@ -2,138 +2,216 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
-declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
-declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
 declare float @llvm.sqrt.f32(float)
 declare void @llvm.s390.sfpc(i32)
 
-; For non-strict operations, we expect the post-RA scheduler to
-; separate the two square root instructions on z13.
-define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; The basic assumption of all following tests is that on z13, we never
+; want to see two square root instructions directly in a row, so the
+; post-RA scheduler will always schedule something else in between
+; whenever possible.
+
+; We can move any FP operation across a (normal) store.
+
+define void @f1(float %f1, float %f2, float *%ptr1, float *%ptr2) {
 ; CHECK-LABEL: f1:
 ; CHECK: sqebr
-; CHECK: {{aebr|sebr}}
+; CHECK: ste
 ; CHECK: sqebr
+; CHECK: ste
 ; CHECK: br %r14
 
-  %add = fadd float %f1, %f2
-  %sub = fsub float %f3, %f4
-  %sqrt1 = call float @llvm.sqrt.f32(float %f2)
-  %sqrt2 = call float @llvm.sqrt.f32(float %f4)
-
-  %ptr1 = getelementptr float, float *%ptr0, i64 1
-  %ptr2 = getelementptr float, float *%ptr0, i64 2
-  %ptr3 = getelementptr float, float *%ptr0, i64 3
+  %sqrt1 = call float @llvm.sqrt.f32(float %f1)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f2)
 
-  store float %add, float *%ptr0
-  store float %sub, float *%ptr1
-  store float %sqrt1, float *%ptr2
-  store float %sqrt2, float *%ptr3
+  store float %sqrt1, float *%ptr1
+  store float %sqrt2, float *%ptr2
 
   ret void
 }
 
-; But for strict operations, this must not happen.
-define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+define void @f2(float %f1, float %f2, float *%ptr1, float *%ptr2) {
 ; CHECK-LABEL: f2:
-; CHECK: {{aebr|sebr}}
-; CHECK: {{aebr|sebr}}
 ; CHECK: sqebr
+; CHECK: ste
 ; CHECK: sqebr
+; CHECK: ste
 ; CHECK: br %r14
 
-  %add = call float @llvm.experimental.constrained.fadd.f32(
-                        float %f1, float %f2,
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f1,
                         metadata !"round.dynamic",
-                        metadata !"fpexcept.strict")
-  %sub = call float @llvm.experimental.constrained.fsub.f32(
-                        float %f3, float %f4,
+                        metadata !"fpexcept.ignore")
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f2,
                         metadata !"round.dynamic",
-                        metadata !"fpexcept.strict")
+                        metadata !"fpexcept.ignore")
+
+  store float %sqrt1, float *%ptr1
+  store float %sqrt2, float *%ptr2
+
+  ret void
+}
+
+define void @f3(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f3:
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: br %r14
+
   %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
-                        float %f2,
+                        float %f1,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.strict")
   %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
-                        float %f4,
+                        float %f2,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.strict")
 
-  %ptr1 = getelementptr float, float *%ptr0, i64 1
-  %ptr2 = getelementptr float, float *%ptr0, i64 2
-  %ptr3 = getelementptr float, float *%ptr0, i64 3
+  store float %sqrt1, float *%ptr1
+  store float %sqrt2, float *%ptr2
 
-  store float %add, float *%ptr0
-  store float %sub, float *%ptr1
-  store float %sqrt1, float *%ptr2
-  store float %sqrt2, float *%ptr3
+  ret void
+}
+
+
+; We can move a non-strict FP operation or a fpexcept.ignore
+; operation even across a volatile store, but not a fpexcept.strict
+; operation.
+
+define void @f4(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f4:
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: br %r14
+
+  %sqrt1 = call float @llvm.sqrt.f32(float %f1)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f2)
+
+  store volatile float %sqrt1, float *%ptr1
+  store volatile float %sqrt2, float *%ptr2
 
   ret void
 }
 
-; On the other hand, strict operations that use the fpexcept.ignore
-; exception behaviour should be scheduled freely.
-define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
-; CHECK-LABEL: f3:
+define void @f5(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f5:
 ; CHECK: sqebr
-; CHECK: {{aebr|sebr}}
+; CHECK: ste
 ; CHECK: sqebr
+; CHECK: ste
 ; CHECK: br %r14
 
-  %add = call float @llvm.experimental.constrained.fadd.f32(
-                        float %f1, float %f2,
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f1,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.ignore")
-  %sub = call float @llvm.experimental.constrained.fsub.f32(
-                        float %f3, float %f4,
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f2,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.ignore")
+
+  store volatile float %sqrt1, float *%ptr1
+  store volatile float %sqrt2, float *%ptr2
+
+  ret void
+}
+
+define void @f6(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f6:
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: ste
+; CHECK: br %r14
+
   %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
                         float %f2,
                         metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  store volatile float %sqrt1, float *%ptr1
+  store volatile float %sqrt2, float *%ptr2
+
+  ret void
+}
+
+
+; No variant of FP operations can be scheduled across a SPFC.
+
+define void @f7(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f7:
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: ste
+; CHECK: br %r14
+
+  %sqrt1 = call float @llvm.sqrt.f32(float %f1)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f2)
+
+  call void @llvm.s390.sfpc(i32 0)
+
+  store float %sqrt1, float *%ptr1
+  store float %sqrt2, float *%ptr2
+
+  ret void
+}
+
+define void @f8(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f8:
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: ste
+; CHECK: ste
+; CHECK: br %r14
+
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f1,
+                        metadata !"round.dynamic",
                         metadata !"fpexcept.ignore")
   %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
-                        float %f4,
+                        float %f2,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.ignore")
 
-  %ptr1 = getelementptr float, float *%ptr0, i64 1
-  %ptr2 = getelementptr float, float *%ptr0, i64 2
-  %ptr3 = getelementptr float, float *%ptr0, i64 3
+  call void @llvm.s390.sfpc(i32 0)
 
-  store float %add, float *%ptr0
-  store float %sub, float *%ptr1
-  store float %sqrt1, float *%ptr2
-  store float %sqrt2, float *%ptr3
+  store float %sqrt1, float *%ptr1
+  store float %sqrt2, float *%ptr2
 
   ret void
 }
 
-; However, even non-strict operations must not be scheduled across an SFPC.
-define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
-; CHECK-LABEL: f4:
-; CHECK: {{aebr|sebr}}
-; CHECK: {{aebr|sebr}}
-; CHECK: sfpc
+define void @f9(float %f1, float %f2, float *%ptr1, float *%ptr2) {
+; CHECK-LABEL: f9:
 ; CHECK: sqebr
 ; CHECK: sqebr
+; CHECK: ste
+; CHECK: ste
 ; CHECK: br %r14
 
-  %add = fadd float %f1, %f2
-  %sub = fsub float %f3, %f4
-  call void @llvm.s390.sfpc(i32 0)
-  %sqrt1 = call float @llvm.sqrt.f32(float %f2)
-  %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
 
-  %ptr1 = getelementptr float, float *%ptr0, i64 1
-  %ptr2 = getelementptr float, float *%ptr0, i64 2
-  %ptr3 = getelementptr float, float *%ptr0, i64 3
+  call void @llvm.s390.sfpc(i32 0)
 
-  store float %add, float *%ptr0
-  store float %sub, float *%ptr1
-  store float %sqrt1, float *%ptr2
-  store float %sqrt2, float *%ptr3
+  store float %sqrt1, float *%ptr1
+  store float %sqrt2, float *%ptr2
 
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index ec1e6746a6168..8ab4c6db255ba 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -108,8 +108,8 @@ define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
 ; S390X-NEXT:    ldeb %f3, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI3_2
 ; S390X-NEXT:    ldeb %f4, 0(%r1)
-; S390X-NEXT:    ddb %f2, 0(%r2)
 ; S390X-NEXT:    ddbr %f3, %f1
+; S390X-NEXT:    ddb %f2, 0(%r2)
 ; S390X-NEXT:    ddbr %f4, %f0
 ; S390X-NEXT:    std %f4, 16(%r2)
 ; S390X-NEXT:    std %f3, 8(%r2)
@@ -659,16 +659,16 @@ entry:
 define void @constrained_vector_fmul_v3f64(<3 x double>* %a) {
 ; S390X-LABEL: constrained_vector_fmul_v3f64:
 ; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 8(%r2)
 ; S390X-NEXT:    larl %r1, .LCPI13_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    ld %f2, 16(%r2)
-; S390X-NEXT:    ldr %f3, %f0
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    ldr %f3, %f1
 ; S390X-NEXT:    mdb %f3, 0(%r2)
-; S390X-NEXT:    mdbr %f1, %f0
-; S390X-NEXT:    mdbr %f2, %f0
+; S390X-NEXT:    mdbr %f2, %f1
 ; S390X-NEXT:    std %f2, 16(%r2)
-; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f0, 8(%r2)
 ; S390X-NEXT:    std %f3, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
@@ -832,16 +832,16 @@ entry:
 define void @constrained_vector_fadd_v3f64(<3 x double>* %a) {
 ; S390X-LABEL: constrained_vector_fadd_v3f64:
 ; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 8(%r2)
 ; S390X-NEXT:    larl %r1, .LCPI18_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    ld %f2, 16(%r2)
-; S390X-NEXT:    ldr %f3, %f0
+; S390X-NEXT:    adbr %f0, %f1
+; S390X-NEXT:    ldr %f3, %f1
 ; S390X-NEXT:    adb %f3, 0(%r2)
-; S390X-NEXT:    adbr %f1, %f0
-; S390X-NEXT:    adbr %f2, %f0
+; S390X-NEXT:    adbr %f2, %f1
 ; S390X-NEXT:    std %f2, 16(%r2)
-; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f0, 8(%r2)
 ; S390X-NEXT:    std %f3, 0(%r2)
 ; S390X-NEXT:    br %r14
 ;
@@ -969,14 +969,14 @@ define <3 x float> @constrained_vector_fsub_v3f32() {
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    larl %r1, .LCPI22_0
 ; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    lzer %f1
 ; S390X-NEXT:    ler %f4, %f0
-; S390X-NEXT:    sebr %f4, %f1
 ; S390X-NEXT:    larl %r1, .LCPI22_1
 ; S390X-NEXT:    ler %f2, %f0
 ; S390X-NEXT:    seb %f2, 0(%r1)
 ; S390X-NEXT:    larl %r1, .LCPI22_2
 ; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    lzer %f1
+; S390X-NEXT:    sebr %f4, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_fsub_v3f32:

From b3f967d411358dcf54a8085df1d9097f5ab8b6d2 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 16 Jul 2019 15:57:12 +0000
Subject: [PATCH 250/451] [AMDGPU] Add the adjusted FP as a livein register.

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64145

llvm-svn: 366223
---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 52 ++++++++++---------
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    | 21 +++++---
 llvm/lib/Target/AMDGPU/SIFrameLowering.h      |  2 +-
 .../AMDGPU/frame-lowering-fp-adjusted.mir     | 50 ++++++++++++++++++
 4 files changed, 91 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 672e49184a501..0ea8db04c2985 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1067,15 +1067,15 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
 
   auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
                                    const TargetRegisterClass &RC,
-                                   ArgDescriptor &Arg) {
+                                   ArgDescriptor &Arg, unsigned UserSGPRs,
+                                   unsigned SystemSGPRs) {
     // Skip parsing if it's not present.
     if (!A)
       return false;
 
     if (A->IsRegister) {
       unsigned Reg;
-      if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value,
-                                      Error)) {
+      if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
         SourceRange = A->RegisterName.SourceRange;
         return true;
       }
@@ -1088,60 +1088,62 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
     if (A->Mask)
       Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
 
+    MFI->NumUserSGPRs += UserSGPRs;
+    MFI->NumSystemSGPRs += SystemSGPRs;
     return false;
   };
 
   if (YamlMFI.ArgInfo &&
       (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
                              AMDGPU::SReg_128RegClass,
-                             MFI->ArgInfo.PrivateSegmentBuffer) ||
+                             MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
-                             AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.DispatchPtr) ||
+                             AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
+                             2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.QueuePtr) ||
+                             MFI->ArgInfo.QueuePtr, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.KernargSegmentPtr) ||
+                             MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
-                             AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.DispatchID) ||
+                             AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
+                             2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.FlatScratchInit) ||
+                             MFI->ArgInfo.FlatScratchInit, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
                              AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.PrivateSegmentSize) ||
+                             MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
-                             AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupIDX) ||
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
+                             0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
-                             AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupIDY) ||
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
+                             0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
-                             AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupIDZ) ||
+                             AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
+                             0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
                              AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.WorkGroupInfo) ||
+                             MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
                              AMDGPU::SGPR_32RegClass,
-                             MFI->ArgInfo.PrivateSegmentWaveByteOffset) ||
+                             MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.ImplicitArgPtr) ||
+                             MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
                              AMDGPU::SReg_64RegClass,
-                             MFI->ArgInfo.ImplicitBufferPtr) ||
+                             MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
                              AMDGPU::VGPR_32RegClass,
-                             MFI->ArgInfo.WorkItemIDX) ||
+                             MFI->ArgInfo.WorkItemIDX, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
                              AMDGPU::VGPR_32RegClass,
-                             MFI->ArgInfo.WorkItemIDY) ||
+                             MFI->ArgInfo.WorkItemIDY, 0, 0) ||
        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
                              AMDGPU::VGPR_32RegClass,
-                             MFI->ArgInfo.WorkItemIDZ)))
+                             MFI->ArgInfo.WorkItemIDZ, 0, 0)))
     return true;
 
   MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 44647d8ba871f..feab6bed2603f 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -311,7 +311,8 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
 }
 
 // Shift down registers reserved for the scratch wave offset.
-unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+std::pair<unsigned, bool>
+SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
     const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
     SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -322,17 +323,17 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
   // No replacement necessary.
   if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
       (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
-    return AMDGPU::NoRegister;
+    return std::make_pair(AMDGPU::NoRegister, false);
   }
 
   if (ST.hasSGPRInitBug())
-    return ScratchWaveOffsetReg;
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
 
   ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
   if (NumPreloaded > AllSGPRs.size())
-    return ScratchWaveOffsetReg;
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   AllSGPRs = AllSGPRs.slice(NumPreloaded);
 
@@ -353,10 +354,11 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
   unsigned ReservedRegCount = 13;
 
   if (AllSGPRs.size() < ReservedRegCount)
-    return ScratchWaveOffsetReg;
+    return std::make_pair(ScratchWaveOffsetReg, false);
 
   bool HandledScratchWaveOffsetReg =
     ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+  bool FPAdjusted = false;
 
   for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
     // Pick the first unallocated SGPR. Be careful not to pick an alias of the
@@ -374,12 +376,13 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
         MFI->setScratchWaveOffsetReg(Reg);
         MFI->setFrameOffsetReg(Reg);
         ScratchWaveOffsetReg = Reg;
+        FPAdjusted = true;
         break;
       }
     }
   }
 
-  return ScratchWaveOffsetReg;
+  return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
 }
 
 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
@@ -415,7 +418,9 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
   unsigned ScratchRsrcReg
     = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
 
-  unsigned ScratchWaveOffsetReg =
+  unsigned ScratchWaveOffsetReg;
+  bool FPAdjusted;
+  std::tie(ScratchWaveOffsetReg, FPAdjusted) =
       getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
 
   // We need to insert initialization of the scratch resource descriptor.
@@ -453,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
     if (&OtherBB == &MBB)
       continue;
 
-    if (OffsetRegUsed)
+    if (OffsetRegUsed || FPAdjusted)
       OtherBB.addLiveIn(ScratchWaveOffsetReg);
 
     if (ResourceRegUsed)
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 1954328714899..c644f4726e2ce 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -66,7 +66,7 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
     SIMachineFunctionInfo *MFI,
     MachineFunction &MF) const;
 
-  unsigned getReservedPrivateSegmentWaveByteOffsetReg(
+  std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
       const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
       SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
 
diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
new file mode 100644
index 0000000000000..348559fbd0daf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
@@ -0,0 +1,50 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+
+
+# CHECK-LABEL: name: foo
+# CHECK: BUFFER_STORE_DWORD_OFFSET
+--- |
+
+  define amdgpu_kernel void @foo() #0 {
+    ret void
+  }
+
+  attributes #0 = {  "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+...
+---
+name:            foo
+tracksRegLiveness: true
+liveins:         
+  - { reg: '$vgpr0' }
+  - { reg: '$sgpr4_sgpr5' }
+  - { reg: '$sgpr6_sgpr7' }
+  - { reg: '$sgpr8' }
+frameInfo:       
+  maxAlignment:    4
+stack:           
+  - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo: 
+  explicitKernArgSize: 660
+  maxKernArgAlign: 4
+  isEntryFunction: true
+  waveLimiter:     true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  scratchWaveOffsetReg: '$sgpr101'
+  frameOffsetReg:  '$sgpr101'
+  stackPtrOffsetReg: '$sgpr32'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
+    kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
+    workGroupIDX:    { reg: '$sgpr8' }
+    privateSegmentWaveByteOffset: { reg: '$sgpr9' }
+body:             |
+  bb.0:
+    successors: %bb.1
+    liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7
+  
+  bb.1:
+    liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7
+
+    renamable $vgpr2 = IMPLICIT_DEF
+    SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)

From 85b9651edd53d7bfb8d3076a79992450a787ec6d Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Tue, 16 Jul 2019 16:07:10 +0000
Subject: [PATCH 251/451] [OPENMP][NVPTX]Fixed checks for cuda versions.

Summary:
We used CUDART_VERSION macro to check for the installed cuda version
but this macro is defined in cuda_runtime_api.h, which is not used by
project. Better to use CUDA_VERSION macro, which is defined in cuda.h.
Also, added the check if this macro is defined. If macro is undefined,
there is something wrong with the cuda configuration and we should not
continue the compilation.
This also fixes problems with runtime building in cuda 10+.

Reviewers: grokos

Subscribers: guansong, jdoerfert, caomhin, kkwli0, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D64648

llvm-svn: 366224
---
 openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 646417d03cf87..f28284ded6b66 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -48,7 +48,9 @@
 // Macros for Cuda intrinsics
 // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
 // Also, __ballot(1) in Cuda 8.0 is replaced with __activemask().
-#if defined(CUDART_VERSION) && CUDART_VERSION >= 9000
+#ifndef CUDA_VERSION
+#error CUDA_VERSION macro is undefined, something wrong with cuda.
+#elif CUDA_VERSION >= 9000
 #define __SHFL_SYNC(mask, var, srcLane) __shfl_sync((mask), (var), (srcLane))
 #define __SHFL_DOWN_SYNC(mask, var, delta, width)                              \
   __shfl_down_sync((mask), (var), (delta), (width))
@@ -58,7 +60,7 @@
 #define __SHFL_DOWN_SYNC(mask, var, delta, width)                              \
   __shfl_down((var), (delta), (width))
 #define __ACTIVEMASK() __ballot(1)
-#endif
+#endif // CUDA_VERSION
 
 #define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
 // Use original __syncthreads if compiled by nvcc or clang >= 9.0.

From e6e33cf9f5682570e6d8d362001214d12b35b599 Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Tue, 16 Jul 2019 16:22:04 +0000
Subject: [PATCH 252/451] [TSan] Improve handling of stack pointer mangling in
 {set,long}jmp, pt.10

Remove now-unused assembly code for determining xor key on
Linux/AArch64. This is the final commit of this refactoring.

llvm-svn: 366225
---
 .../lib/tsan/rtl/tsan_platform_linux.cc       |  5 --
 compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S   | 59 -------------------
 2 files changed, 64 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
index c387416c20dd7..ec8606f65d5c2 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc
@@ -67,11 +67,6 @@ extern "C" void *__libc_stack_end;
 void *__libc_stack_end = 0;
 #endif
 
-#if SANITIZER_LINUX && defined(__aarch64__)
-__tsan::uptr InitializeGuardPtr() __attribute__((visibility("hidden")));
-extern "C" __tsan::uptr _tsan_pointer_chk_guard;
-#endif
-
 #if SANITIZER_LINUX && defined(__aarch64__) && !SANITIZER_GO
 # define INIT_LONGJMP_XOR_KEY 1
 #else
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
index 55487bd770538..c35897d3c3624 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
@@ -3,14 +3,6 @@
 
 #include "sanitizer_common/sanitizer_asm.h"
 
-#if !defined(__APPLE__)
-.section .bss
-.type	__tsan_pointer_chk_guard, %object
-ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__tsan_pointer_chk_guard))
-__tsan_pointer_chk_guard:
-.zero	8
-#endif
-
 #if defined(__APPLE__)
 .align  2
 
@@ -40,57 +32,6 @@ _sigsetjmp$non_lazy_ptr:
 .align 3
 #endif
 
-#if !defined(__APPLE__)
-// GLIBC mangles the function pointers in jmp_buf (used in {set,long}*jmp
-// functions) by XORing them with a random guard pointer.  For AArch64 it is a
-// global variable rather than a TCB one (as for x86_64/powerpc) and althought
-// its value is exported by the loader, it lies within a private GLIBC
-// namespace (meaning it should be only used by GLIBC itself and the ABI is
-// not stable). So InitializeGuardPtr obtains the pointer guard value by
-// issuing a setjmp and checking the resulting pointers values against the
-// original ones.
-ASM_HIDDEN(_Z18InitializeGuardPtrv)
-.global _Z18InitializeGuardPtrv
-ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_Z18InitializeGuardPtrv))
-_Z18InitializeGuardPtrv:
-  CFI_STARTPROC
-  // Allocates a jmp_buf for the setjmp call.
-  stp	x29, x30, [sp, -336]!
-  CFI_DEF_CFA_OFFSET (336)
-  CFI_OFFSET (29, -336)
-  CFI_OFFSET (30, -328)
-  add	x29, sp, 0
-  CFI_DEF_CFA_REGISTER (29)
-  add	x0, x29, 24
-
-  // Call libc setjmp that mangle the stack pointer value
-  adrp  x1, :got:_ZN14__interception12real__setjmpE
-  ldr   x1, [x1, #:got_lo12:_ZN14__interception12real__setjmpE]
-  ldr   x1, [x1]
-  blr   x1
-
-  // glibc setjmp mangles both the frame pointer (FP, pc+4 on blr) and the
-  // stack pointer (SP). FP will be placed on ((uintptr*)jmp_buf)[11] and
-  // SP at ((uintptr*)jmp_buf)[13].
-  // The mangle operation is just 'value' xor 'pointer guard value' and
-  // if we know the original value (SP) and the expected one, we can derive
-  // the guard pointer value.
-  mov	x0, sp
-
-  // Loads the mangled SP pointer.
-  ldr	x1, [x29, 128]
-  eor	x0, x0, x1
-  adrp	x2, __tsan_pointer_chk_guard
-  str	x0, [x2, #:lo12:__tsan_pointer_chk_guard]
-  ldp	x29, x30, [sp], 336
-  CFI_RESTORE (30)
-  CFI_RESTORE (19)
-  CFI_DEF_CFA (31, 0)
-  ret
-  CFI_ENDPROC
-ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_Z18InitializeGuardPtrv))
-#endif
-
 ASM_HIDDEN(__tsan_setjmp)
 .comm _ZN14__interception11real_setjmpE,8,8
 .globl ASM_SYMBOL_INTERCEPTOR(setjmp)

From e8ced86debe6cbf5d998796b0b969a782c9d5cba Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Tue, 16 Jul 2019 16:57:45 +0000
Subject: [PATCH 253/451] [CMake] Add Apple-lldb-Xcode.cmake cache that avoids
 install options

llvm-svn: 366226
---
 lldb/cmake/caches/Apple-lldb-Xcode.cmake | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 lldb/cmake/caches/Apple-lldb-Xcode.cmake

diff --git a/lldb/cmake/caches/Apple-lldb-Xcode.cmake b/lldb/cmake/caches/Apple-lldb-Xcode.cmake
new file mode 100644
index 0000000000000..6602b07f7868b
--- /dev/null
+++ b/lldb/cmake/caches/Apple-lldb-Xcode.cmake
@@ -0,0 +1,4 @@
+include(${CMAKE_CURRENT_LIST_DIR}/Apple-lldb-base.cmake)
+
+set(LLDB_BUILD_FRAMEWORK ON CACHE BOOL "")
+set(CMAKE_OSX_DEPLOYMENT_TARGET 10.11 CACHE STRING "")

From 97b4d7a8e14f37319676f750fa57d68eb09c0e16 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Tue, 16 Jul 2019 17:13:02 +0000
Subject: [PATCH 254/451] Removed -mno-omit-leaf-frame-pointer from flags.

Removes -mno-omit-leaf-frame-pointer from Scudo and GWP-ASan's CFlags. Attempt to fix
the sanitizer buildbots.

llvm-svn: 366228
---
 compiler-rt/lib/gwp_asan/CMakeLists.txt | 2 --
 compiler-rt/lib/scudo/CMakeLists.txt    | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/compiler-rt/lib/gwp_asan/CMakeLists.txt b/compiler-rt/lib/gwp_asan/CMakeLists.txt
index 952acb1830411..94c5336ce1b3b 100644
--- a/compiler-rt/lib/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/CMakeLists.txt
@@ -25,8 +25,6 @@ set(GWP_ASAN_CFLAGS -fno-rtti -fno-exceptions -nostdinc++ -pthread)
 append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC GWP_ASAN_CFLAGS)
 append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fno-omit-frame-pointer
                GWP_ASAN_CFLAGS)
-append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG
-               -mno-omit-leaf-frame-pointer GWP_ASAN_CFLAGS)
 
 # Remove -stdlib= which is unused when passing -nostdinc++.
 string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
diff --git a/compiler-rt/lib/scudo/CMakeLists.txt b/compiler-rt/lib/scudo/CMakeLists.txt
index 9ee615c787de7..2a560b8fcb7f0 100644
--- a/compiler-rt/lib/scudo/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/CMakeLists.txt
@@ -14,10 +14,6 @@ append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread SCUDO_MINIMAL_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_LIBLOG log SCUDO_MINIMAL_DYNAMIC_LIBS)
 append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fno-omit-frame-pointer
                SCUDO_CFLAGS)
-if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-  append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG
-                 -mno-omit-leaf-frame-pointer SCUDO_CFLAGS)
-endif()
 
 set(SCUDO_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
 # Use gc-sections by default to avoid unused code being pulled in.

From 1ff553578551766801cbd25dd2c659dc8594b96b Mon Sep 17 00:00:00 2001
From: Jonas Hahnfeld <hahnjo@hahnjo.de>
Date: Tue, 16 Jul 2019 17:16:43 +0000
Subject: [PATCH 255/451] [OpenMP] Move header inclusion out of 'extern "C"'

This leads to problems when compiling C++ code with libc++ for Nvidia GPUs
because Clang now uses wrappers for math functions that might include
C++ templates not allowed in 'extern "C"'.

Differentiel Revision: https://reviews.llvm.org/D64625

llvm-svn: 366229
---
 openmp/runtime/src/include/omp.h.var | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index 54ba1f545b472..2246e7012bee2 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -15,6 +15,9 @@
 #ifndef __OMP_H
 #   define __OMP_H
 
+#   include <stdlib.h>
+#   include <stdint.h>
+
 #   define KMP_VERSION_MAJOR    @LIBOMP_VERSION_MAJOR@
 #   define KMP_VERSION_MINOR    @LIBOMP_VERSION_MINOR@
 #   define KMP_VERSION_BUILD    @LIBOMP_VERSION_BUILD@
@@ -134,8 +137,6 @@
     extern int  __KAI_KMPC_CONVENTION  omp_get_team_num (void);
     extern int  __KAI_KMPC_CONVENTION  omp_get_cancellation (void);
 
-#   include <stdlib.h>
-#   include <stdint.h>
     /* OpenMP 4.5 */
     extern int   __KAI_KMPC_CONVENTION  omp_get_initial_device (void);
     extern void* __KAI_KMPC_CONVENTION  omp_target_alloc(size_t, int);

From c65a9db43e17f0acdd39b76498d1c23e4a70f9a1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 17:22:21 +0000
Subject: [PATCH 256/451] AMDGPU: Fix missing immarg for mfma intrinsics

llvm-svn: 366230
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      | 60 ++++++++++++-------
 llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll | 23 +++++++
 2 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1cde3afd69e1d..bad4216173d0a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1675,83 +1675,103 @@ def int_amdgcn_global_atomic_fadd    : AMDGPUGlobalAtomicNoRtn;
 // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
 def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty],
   [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty],
   [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty],
   [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty],
   [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty],
   [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty],
   [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty],
   [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty],
   [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty],
   [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty],
   [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty],
   [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty],
   [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty],
   [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty],
   [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty],
   [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
-   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
+   llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+   [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>;
 
 //===----------------------------------------------------------------------===//
 // Special Intrinsics for backend internal use only. No frontend
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
index a72d1da68a212..76098385b6a23 100644
--- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll
@@ -674,3 +674,26 @@ define void @test_interp_p2_f16(float %arg0, float %arg1, i32 %arg2, i32 %arg3,
 
   ret void
 }
+
+declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32)
+define void @test_mfma_f32_32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 %arg4, i32 %arg5) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg3
+  ; CHECK-NEXT: %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3)
+  %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3)
+  store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg4
+  ; CHECK-NEXT: %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3)
+  %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3)
+  store volatile <32 x i32> %val1, <32 x i32> addrspace(1)* undef
+
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg5
+  ; CHECK-NEXT: %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5)
+  %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5)
+  store volatile <32 x i32> %val2, <32 x i32> addrspace(1)* undef
+
+  ret void
+}

From 4754814c5a68cf609f83b491feb38ad53c5acd4e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 16 Jul 2019 17:24:33 +0000
Subject: [PATCH 257/451] fix unnamed fiefield issue and add tests for
 __builtin_preserve_access_index intrinsic

The original commit is r366076. It is temporarily reverted (r366155)
due to test failure. This resubmit makes test more robust by accepting
regex instead of hardcoded names/references in several places.

This is a followup patch for https://reviews.llvm.org/D61809.
Handle unnamed bitfield properly and add more test cases.

Fixed the unnamed bitfield issue. The unnamed bitfield is ignored
by debug info, so we need to ignore such a struct/union member
when we try to get the member index in the debug info.

D61809 contains two test cases but not enough as it does
not checking generated IRs in the fine grain level, and also
it does not have semantics checking tests.
This patch added unit tests for both code gen and semantics checking for
the new intrinsic.

Signed-off-by: Yonghong Song <yhs@fb.com>
llvm-svn: 366231
---
 clang/lib/CodeGen/CGExpr.cpp                  |  21 ++-
 clang/lib/CodeGen/CodeGenFunction.h           |   3 +
 .../CodeGen/builtin-preserve-access-index.c   | 177 ++++++++++++++++++
 .../test/Sema/builtin-preserve-access-index.c |  13 ++
 4 files changed, 212 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/builtin-preserve-access-index.c
 create mode 100644 clang/test/Sema/builtin-preserve-access-index.c

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 4d19a12e5cb05..5a4b1188b7114 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3892,6 +3892,23 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) {
   return EmitLValueForField(LambdaLV, Field);
 }
 
+/// Get the field index in the debug info. The debug info structure/union
+/// will ignore the unnamed bitfields.
+unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec,
+                                             unsigned FieldIndex) {
+  unsigned I = 0, Skipped = 0;
+
+  for (auto F : Rec->getDefinition()->fields()) {
+    if (I == FieldIndex)
+      break;
+    if (F->isUnnamedBitfield())
+      Skipped++;
+    I++;
+  }
+
+  return FieldIndex - Skipped;
+}
+
 /// Get the address of a zero-sized field within a record. The resulting
 /// address doesn't necessarily have the right type.
 static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
@@ -3931,7 +3948,7 @@ static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base,
       CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
   return CGF.Builder.CreatePreserveStructAccessIndex(
-      base, idx, field->getFieldIndex(), DbgInfo);
+      base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo);
 }
 
 static bool hasAnyVptr(const QualType Type, const ASTContext &Context) {
@@ -4048,7 +4065,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
           getContext().getRecordType(rec), rec->getLocation());
       addr = Address(
           Builder.CreatePreserveUnionAccessIndex(
-              addr.getPointer(), field->getFieldIndex(), DbgInfo),
+              addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo),
           addr.getAlignment());
     }
   } else {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index bd9e14206a09e..06ef2dff7e9f5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2652,6 +2652,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Converts Location to a DebugLoc, if debug information is enabled.
   llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location);
 
+  /// Get the record field index as represented in debug info.
+  unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex);
+
 
   //===--------------------------------------------------------------------===//
   //                            Declaration Emission
diff --git a/clang/test/CodeGen/builtin-preserve-access-index.c b/clang/test/CodeGen/builtin-preserve-access-index.c
new file mode 100644
index 0000000000000..954a3b827d25a
--- /dev/null
+++ b/clang/test/CodeGen/builtin-preserve-access-index.c
@@ -0,0 +1,177 @@
+// RUN: %clang -target x86_64 -emit-llvm -S -g %s -o - | FileCheck %s
+
+#define _(x) (__builtin_preserve_access_index(x))
+
+const void *unit1(const void *arg) {
+  return _(arg);
+}
+// CHECK: define dso_local i8* @unit1
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+
+const void *unit2(void) {
+  return _((const void *)0xffffffffFFFF0000ULL);
+}
+// CHECK: define dso_local i8* @unit2
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+
+const void *unit3(const int *arg) {
+  return _(arg + 1);
+}
+// CHECK: define dso_local i8* @unit3
+// CHECK-NOT: llvm.preserve.array.access.index
+// CHECK-NOT: llvm.preserve.struct.access.index
+// CHECK-NOT: llvm.preserve.union.access.index
+
+const void *unit4(const int *arg) {
+  return _(&arg[1]);
+}
+// CHECK: define dso_local i8* @unit4
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %{{[0-9a-z]+}}, i32 0, i32 1)
+
+const void *unit5(const int *arg[5]) {
+  return _(&arg[1][2]);
+}
+// CHECK: define dso_local i8* @unit5
+// CHECK-NOT: getelementptr
+// CHECK: call i32** @llvm.preserve.array.access.index.p0p0i32.p0p0i32(i32** %{{[0-9a-z]+}}, i32 0, i32 1)
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %{{[0-9a-z]+}}, i32 0, i32 2)
+
+struct s1 {
+  char a;
+  int b;
+};
+
+struct s2 {
+  char a1:1;
+  char a2:1;
+  int b;
+};
+
+struct s3 {
+  char a1:1;
+  char a2:1;
+  char :6;
+  int b;
+};
+
+const void *unit6(struct s1 *arg) {
+  return _(&arg->a);
+}
+// CHECK: define dso_local i8* @unit6
+// CHECK-NOT: getelementptr
+// CHECK: call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.s1s(%struct.s1* %{{[0-9a-z]+}}, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1:[0-9]+]]
+
+const void *unit7(struct s1 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit7
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %{{[0-9a-z]+}}, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1]]
+
+const void *unit8(struct s2 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit8
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s2s(%struct.s2* %{{[0-9a-z]+}}, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S2:[0-9]+]]
+
+const void *unit9(struct s3 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit9
+// CHECK-NOT: getelementptr
+// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s3s(%struct.s3* %{{[0-9a-z]+}}, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S3:[0-9]+]]
+
+union u1 {
+  char a;
+  int b;
+};
+
+union u2 {
+  char a;
+  int :32;
+  int b;
+};
+
+const void *unit10(union u1 *arg) {
+  return _(&arg->a);
+}
+// CHECK: define dso_local i8* @unit10
+// CHECK-NOT: getelementptr
+// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %{{[0-9a-z]+}}, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1:[0-9]+]]
+
+const void *unit11(union u1 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit11
+// CHECK-NOT: getelementptr
+// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1]]
+
+const void *unit12(union u2 *arg) {
+  return _(&arg->b);
+}
+// CHECK: define dso_local i8* @unit12
+// CHECK-NOT: getelementptr
+// CHECK: call %union.u2* @llvm.preserve.union.access.index.p0s_union.u2s.p0s_union.u2s(%union.u2* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U2:[0-9]+]]
+
+struct s4 {
+  char d;
+  union u {
+    int b[4];
+    char a;
+  } c;
+};
+
+union u3 {
+  struct s {
+    int b[4];
+  } c;
+  char a;
+};
+
+const void *unit13(struct s4 *arg) {
+  return _(&arg->c.b[2]);
+}
+// CHECK: define dso_local i8* @unit13
+// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %{{[0-9a-z]+}}, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4:[0-9]+]]
+// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %{{[0-9a-z]+}}, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U:[0-9]+]]
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %{{[0-9a-z]+}}, i32 1, i32 2)
+
+const void *unit14(union u3 *arg) {
+  return _(&arg->c.b[2]);
+}
+// CHECK: define dso_local i8* @unit14
+// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %{{[0-9a-z]+}}, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3:[0-9]+]]
+// CHECK: call [4 x i32]* @llvm.preserve.struct.access.index.p0a4i32.p0s_struct.ss(%struct.s* %{{[0-9a-z]+}}, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_I_S:[0-9]+]]
+// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %{{[0-9a-z]+}}, i32 1, i32 2)
+
+const void *unit15(struct s4 *arg) {
+  return _(&arg[2].c.a);
+}
+// CHECK: define dso_local i8* @unit15
+// CHECK: call %struct.s4* @llvm.preserve.array.access.index.p0s_struct.s4s.p0s_struct.s4s(%struct.s4* %{{[0-9a-z]+}}, i32 0, i32 2)
+// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %{{[0-9a-z]+}}, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4]]
+// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U]]
+
+const void *unit16(union u3 *arg) {
+  return _(&arg[2].a);
+}
+// CHECK: define dso_local i8* @unit16
+// CHECK: call %union.u3* @llvm.preserve.array.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %{{[0-9a-z]+}}, i32 0, i32 2)
+// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3]]
+
+// CHECK: ![[STRUCT_S1]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1"
+// CHECK: ![[STRUCT_S2]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2"
+// CHECK: ![[STRUCT_S3]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s3"
+// CHECK: ![[UNION_U1]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u1"
+// CHECK: ![[UNION_U2]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u2"
+// CHECK: ![[STRUCT_S4]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s4"
+// CHECK: ![[UNION_I_U]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u"
+// CHECK: ![[UNION_U3]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u3"
+// CHECK: ![[STRUCT_I_S]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s"
diff --git a/clang/test/Sema/builtin-preserve-access-index.c b/clang/test/Sema/builtin-preserve-access-index.c
new file mode 100644
index 0000000000000..c10ceb5145b8c
--- /dev/null
+++ b/clang/test/Sema/builtin-preserve-access-index.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s
+
+const void *invalid1(const int *arg) {
+  return __builtin_preserve_access_index(&arg[1], 1); // expected-error {{too many arguments to function call, expected 1, have 2}}
+}
+
+void *invalid2(const int *arg) {
+  return __builtin_preserve_access_index(&arg[1]); // expected-warning {{returning 'const void *' from a function with result type 'void *' discards qualifiers}}
+}
+
+const void *invalid3(const int *arg) {
+  return __builtin_preserve_access_index(1); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const void *'}}
+}

From c26e27d802c146d08446eab0d47e1dd2be233506 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 16 Jul 2019 17:29:03 +0000
Subject: [PATCH 258/451] [NFC] Fix -Wreorder warning in TBB backend

llvm-svn: 366232
---
 pstl/include/pstl/internal/parallel_backend_tbb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h
index 8ac385e3992db..6e7cb66077e6e 100644
--- a/pstl/include/pstl/internal/parallel_backend_tbb.h
+++ b/pstl/include/pstl/internal/parallel_backend_tbb.h
@@ -862,8 +862,8 @@ class __stable_sort_task : public tbb::task
     __stable_sort_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs,
                        bool __root, _Compare __comp, _LeafSort __leaf_sort, _SizeType __nsort,
                        _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg)
-        : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_root(__root), _M_comp(__comp),
-          _M_leaf_sort(__leaf_sort), _M_nsort(__nsort)
+        : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_comp(__comp),
+          _M_leaf_sort(__leaf_sort), _M_root(__root), _M_nsort(__nsort)
     {
     }
 };

From 3559fcd1149b339757b10365a4b24161b6ef71a5 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 16 Jul 2019 17:29:09 +0000
Subject: [PATCH 259/451] [pstl] Use std::transform_reduce instead of
 hand-rolled implementation

llvm-svn: 366233
---
 pstl/include/pstl/internal/numeric_impl.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pstl/include/pstl/internal/numeric_impl.h b/pstl/include/pstl/internal/numeric_impl.h
index 625bb9441c8a6..fe1f222d437e4 100644
--- a/pstl/include/pstl/internal/numeric_impl.h
+++ b/pstl/include/pstl/internal/numeric_impl.h
@@ -93,11 +93,7 @@ _Tp
 __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
                          _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept
 {
-    for (; __first != __last; ++__first)
-    {
-        __init = __binary_op(__init, __unary_op(*__first));
-    }
-    return __init;
+    return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op);
 }
 
 template <class _ForwardIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>

From c6fd5abecc42fb56525cb56b62f95603863ae6a0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 17:38:50 +0000
Subject: [PATCH 260/451] AMDGPU: Redefine load PatFrags

Rewrite PatFrags using the new PatFrag address space matching in
tablegen. These will now work with both SelectionDAG and GlobalISel.

llvm-svn: 366234
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 167 +++++++++++--------
 llvm/lib/Target/AMDGPU/BUFInstructions.td    |   4 +-
 llvm/lib/Target/AMDGPU/FLATInstructions.td   |   8 +-
 llvm/lib/Target/AMDGPU/R600Instructions.td   |   2 +
 4 files changed, 105 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 9e9510e0fa4a0..d470b3cd51486 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -11,6 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+class AddressSpacesImpl {
+  int Flat = 0;
+  int Global = 1;
+  int Region = 2;
+  int Local = 3;
+  int Constant = 4;
+  int Private = 5;
+}
+
+def AddrSpaces : AddressSpacesImpl;
+
+
 class AMDGPUInst <dag outs, dag ins, string asm = "",
   list<dag> pattern = []> : Instruction {
   field bit isRegisterLoad = 0;
@@ -323,6 +335,10 @@ def TEX_SHADOW_ARRAY : PatLeaf<
 // Load/Store Pattern Fragments
 //===----------------------------------------------------------------------===//
 
+class AddressSpaceList<list<int> AS> {
+  list<int> AddrSpaces = AS;
+}
+
 class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{
   return cast<MemSDNode>(N)->getAlignment() % 8 == 0;
 }]>;
@@ -341,25 +357,25 @@ class StoreHi16<SDPatternOperator op> : PatFrag <
   (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)
 >;
 
-class PrivateAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
-}]>;
+def LoadAddress_constant : AddressSpaceList<[  AddrSpaces.Constant ]>;
+def LoadAddress_global : AddressSpaceList<[  AddrSpaces.Global, AddrSpaces.Constant ]>;
+def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
 
-class ConstantAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
+def LoadAddress_flat : AddressSpaceList<[  AddrSpaces.Flat,
+                                           AddrSpaces.Global,
+                                           AddrSpaces.Constant ]>;
+def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
 
-class LocalAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
+def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
+def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
+
+def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
+def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
+
+def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
+def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
 
-class RegionAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
-}]>;
 
-class GlobalAddress : CodePatPred<[{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
 
 class GlobalLoadAddress : CodePatPred<[{
   auto AS = cast<MemSDNode>(N)->getAddressSpace();
@@ -373,37 +389,86 @@ class FlatLoadAddress : CodePatPred<[{
          AS == AMDGPUAS::CONSTANT_ADDRESS;
 }]>;
 
+class GlobalAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}]>;
+
+class PrivateAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+}]>;
+
+class LocalAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+class RegionAddress : CodePatPred<[{
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+}]>;
+
 class FlatStoreAddress : CodePatPred<[{
   const auto AS = cast<MemSDNode>(N)->getAddressSpace();
   return AS == AMDGPUAS::FLAT_ADDRESS ||
          AS == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
-class PrivateLoad <SDPatternOperator op> : LoadFrag <op>, PrivateAddress;
+// TODO: Remove these when stores to new PatFrag format.
 class PrivateStore <SDPatternOperator op> : StoreFrag <op>, PrivateAddress;
-
-class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
 class LocalStore <SDPatternOperator op> : StoreFrag <op>, LocalAddress;
-
-class RegionLoad <SDPatternOperator op> : LoadFrag <op>, RegionAddress;
 class RegionStore <SDPatternOperator op> : StoreFrag <op>, RegionAddress;
-
-class GlobalLoad <SDPatternOperator op> : LoadFrag<op>, GlobalLoadAddress;
 class GlobalStore <SDPatternOperator op> : StoreFrag<op>, GlobalAddress;
-
-class FlatLoad <SDPatternOperator op> : LoadFrag <op>, FlatLoadAddress;
 class FlatStore <SDPatternOperator op> : StoreFrag <op>, FlatStoreAddress;
 
-class ConstantLoad <SDPatternOperator op> : LoadFrag <op>, ConstantAddress;
 
+foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
+let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
+
+def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
+  let IsLoad = 1;
+  let IsNonExtLoad = 1;
+}
+
+def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
+
+def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
+
+def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i8;
+}
+
+def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = i16;
+}
+
+def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i32;
+}
+
+def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i64;
+}
 
-def load_private : PrivateLoad <load>;
-def extloadi8_private : PrivateLoad <extloadi8>;
-def zextloadi8_private : PrivateLoad <zextloadi8>;
-def sextloadi8_private : PrivateLoad <sextloadi8>;
-def extloadi16_private : PrivateLoad <extloadi16>;
-def zextloadi16_private : PrivateLoad <zextloadi16>;
-def sextloadi16_private : PrivateLoad <sextloadi16>;
+} // End let AddressSpaces = ...
+} // End foreach AddrSpace
 
 def store_private : PrivateStore <store>;
 def truncstorei8_private : PrivateStore<truncstorei8>;
@@ -411,16 +476,6 @@ def truncstorei16_private : PrivateStore <truncstorei16>;
 def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
 def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
 
-
-def load_global : GlobalLoad <load>;
-def sextloadi8_global : GlobalLoad <sextloadi8>;
-def extloadi8_global : GlobalLoad <extloadi8>;
-def zextloadi8_global : GlobalLoad <zextloadi8>;
-def sextloadi16_global : GlobalLoad <sextloadi16>;
-def extloadi16_global : GlobalLoad <extloadi16>;
-def zextloadi16_global : GlobalLoad <zextloadi16>;
-def atomic_load_global : GlobalLoad<atomic_load>;
-
 def store_global : GlobalStore <store>;
 def truncstorei8_global : GlobalStore <truncstorei8>;
 def truncstorei16_global : GlobalStore <truncstorei16>;
@@ -428,16 +483,6 @@ def store_atomic_global : GlobalStore<atomic_store>;
 def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
 def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
 
-def load_local : LocalLoad <load>;
-def extloadi8_local : LocalLoad <extloadi8>;
-def zextloadi8_local : LocalLoad <zextloadi8>;
-def sextloadi8_local : LocalLoad <sextloadi8>;
-def extloadi16_local : LocalLoad <extloadi16>;
-def zextloadi16_local : LocalLoad <zextloadi16>;
-def sextloadi16_local : LocalLoad <sextloadi16>;
-def atomic_load_32_local : LocalLoad<atomic_load_32>;
-def atomic_load_64_local : LocalLoad<atomic_load_64>;
-
 def store_local : LocalStore <store>;
 def truncstorei8_local : LocalStore <truncstorei8>;
 def truncstorei16_local : LocalStore <truncstorei16>;
@@ -461,15 +506,6 @@ def store_align16_local : Aligned16Bytes <
   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 >;
 
-def load_flat          : FlatLoad <load>;
-def extloadi8_flat  : FlatLoad <extloadi8>;
-def zextloadi8_flat  : FlatLoad <zextloadi8>;
-def sextloadi8_flat    : FlatLoad <sextloadi8>;
-def extloadi16_flat : FlatLoad <extloadi16>;
-def zextloadi16_flat : FlatLoad <zextloadi16>;
-def sextloadi16_flat   : FlatLoad <sextloadi16>;
-def atomic_load_flat   : FlatLoad<atomic_load>;
-
 def store_flat         : FlatStore <store>;
 def truncstorei8_flat  : FlatStore <truncstorei8>;
 def truncstorei16_flat : FlatStore <truncstorei16>;
@@ -478,15 +514,6 @@ def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
 def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
 
 
-def constant_load : ConstantLoad<load>;
-def sextloadi8_constant : ConstantLoad <sextloadi8>;
-def extloadi8_constant : ConstantLoad <extloadi8>;
-def zextloadi8_constant : ConstantLoad <zextloadi8>;
-def sextloadi16_constant : ConstantLoad <sextloadi16>;
-def extloadi16_constant : ConstantLoad <extloadi16>;
-def zextloadi16_constant : ConstantLoad <zextloadi16>;
-
-
 class local_binary_atomic_op<SDNode atomic_op> :
   PatFrag<(ops node:$ptr, node:$value),
     (atomic_op node:$ptr, node:$value), [{
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 4ff9aeb2e314e..5b6c8a7ed96fc 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1445,8 +1445,8 @@ def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_const
 def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, extloadi16_constant>;
 def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, zextloadi16_constant>;
 
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_global>;
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_global>;
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, atomic_load_32_global>;
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, atomic_load_64_global>;
 } // End SubtargetPredicate = isGFX6GFX7
 
 multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 4070d94dd4ab2..9d541560613cc 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -782,8 +782,8 @@ def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
 def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
 
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_flat, i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_flat, i64>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
 
 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
 def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
@@ -868,8 +868,8 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
 def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
 
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_global, i32>;
-def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_global, i64>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
+def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
 
 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
 def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index d3ce7ffd673c5..f40eece859ee7 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -296,6 +296,8 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
 }
 
 // FIXME: Deprecated.
+class LocalLoad <SDPatternOperator op> : LoadFrag <op>, LocalAddress;
+
 class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
                                               (ld_node node:$ptr), [{
   LoadSDNode *L = cast<LoadSDNode>(N);

From 17060f0a54b681b8c7cec2f9ab465f6a1e51d968 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@gmail.com>
Date: Tue, 16 Jul 2019 17:44:54 +0000
Subject: [PATCH 261/451] [AMDGPU] Optimize atomic max/min

Summary:
Extend the atomic optimizer to handle signed and unsigned max and min
operations, as well as add and subtract.

Reviewers: arsenm, sheredom, critson, rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64328

llvm-svn: 366235
---
 .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp   | 177 ++++++++++++++----
 .../atomic_optimizations_local_pointer.ll     | 108 +++++++++++
 2 files changed, 249 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 810861503be55..c65a49b7c5bc7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -40,7 +40,7 @@ enum DPP_CTRL {
 
 struct ReplacementInfo {
   Instruction *I;
-  Instruction::BinaryOps Op;
+  AtomicRMWInst::BinOp Op;
   unsigned ValIdx;
   bool ValDivergent;
 };
@@ -55,8 +55,8 @@ class AMDGPUAtomicOptimizer : public FunctionPass,
   bool HasDPP;
   bool IsPixelShader;
 
-  void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op,
-                      unsigned ValIdx, bool ValDivergent) const;
+  void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx,
+                      bool ValDivergent) const;
 
 public:
   static char ID;
@@ -120,16 +120,17 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
     break;
   }
 
-  Instruction::BinaryOps Op;
+  AtomicRMWInst::BinOp Op = I.getOperation();
 
-  switch (I.getOperation()) {
+  switch (Op) {
   default:
     return;
   case AtomicRMWInst::Add:
-    Op = Instruction::Add;
-    break;
   case AtomicRMWInst::Sub:
-    Op = Instruction::Sub;
+  case AtomicRMWInst::Max:
+  case AtomicRMWInst::Min:
+  case AtomicRMWInst::UMax:
+  case AtomicRMWInst::UMin:
     break;
   }
 
@@ -161,7 +162,7 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
 }
 
 void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
-  Instruction::BinaryOps Op;
+  AtomicRMWInst::BinOp Op;
 
   switch (I.getIntrinsicID()) {
   default:
@@ -169,12 +170,32 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
   case Intrinsic::amdgcn_buffer_atomic_add:
   case Intrinsic::amdgcn_struct_buffer_atomic_add:
   case Intrinsic::amdgcn_raw_buffer_atomic_add:
-    Op = Instruction::Add;
+    Op = AtomicRMWInst::Add;
     break;
   case Intrinsic::amdgcn_buffer_atomic_sub:
   case Intrinsic::amdgcn_struct_buffer_atomic_sub:
   case Intrinsic::amdgcn_raw_buffer_atomic_sub:
-    Op = Instruction::Sub;
+    Op = AtomicRMWInst::Sub;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_smin:
+  case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+  case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+    Op = AtomicRMWInst::Min;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_umin:
+  case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+  case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+    Op = AtomicRMWInst::UMin;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_smax:
+  case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+  case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+    Op = AtomicRMWInst::Max;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_umax:
+  case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+  case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+    Op = AtomicRMWInst::UMax;
     break;
   }
 
@@ -206,8 +227,57 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
   ToReplace.push_back(Info);
 }
 
+// Use the builder to create the non-atomic counterpart of the specified
+// atomicrmw binary op.
+static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
+                                  Value *LHS, Value *RHS) {
+  CmpInst::Predicate Pred;
+
+  switch (Op) {
+  default:
+    llvm_unreachable("Unhandled atomic op");
+  case AtomicRMWInst::Add:
+    return B.CreateBinOp(Instruction::Add, LHS, RHS);
+  case AtomicRMWInst::Sub:
+    return B.CreateBinOp(Instruction::Sub, LHS, RHS);
+
+  case AtomicRMWInst::Max:
+    Pred = CmpInst::ICMP_SGT;
+    break;
+  case AtomicRMWInst::Min:
+    Pred = CmpInst::ICMP_SLT;
+    break;
+  case AtomicRMWInst::UMax:
+    Pred = CmpInst::ICMP_UGT;
+    break;
+  case AtomicRMWInst::UMin:
+    Pred = CmpInst::ICMP_ULT;
+    break;
+  }
+  Value *Cond = B.CreateICmp(Pred, LHS, RHS);
+  return B.CreateSelect(Cond, LHS, RHS);
+}
+
+static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
+                                         unsigned BitWidth) {
+  switch (Op) {
+  default:
+    llvm_unreachable("Unhandled atomic op");
+  case AtomicRMWInst::Add:
+  case AtomicRMWInst::Sub:
+  case AtomicRMWInst::UMax:
+    return APInt::getMinValue(BitWidth);
+  case AtomicRMWInst::UMin:
+    return APInt::getMaxValue(BitWidth);
+  case AtomicRMWInst::Max:
+    return APInt::getSignedMinValue(BitWidth);
+  case AtomicRMWInst::Min:
+    return APInt::getSignedMaxValue(BitWidth);
+  }
+}
+
 void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
-                                           Instruction::BinaryOps Op,
+                                           AtomicRMWInst::BinOp Op,
                                            unsigned ValIdx,
                                            bool ValDivergent) const {
   // Start building just before the instruction.
@@ -266,16 +336,16 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
 
   Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
 
-  Value *LaneOffset = nullptr;
+  Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
+
+  Value *ExclScan = nullptr;
   Value *NewV = nullptr;
 
   // If we have a divergent value in each lane, we need to combine the value
   // using DPP.
   if (ValDivergent) {
-    Value *const Identity = B.getIntN(TyBitWidth, 0);
-
-    // First we need to set all inactive invocations to 0, so that they can
-    // correctly contribute to the final result.
+    // First we need to set all inactive invocations to the identity value, so
+    // that they can correctly contribute to the final result.
     CallInst *const SetInactive =
         B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
 
@@ -283,7 +353,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
         B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty,
                           {Identity, SetInactive, B.getInt32(DPP_WF_SR1),
                            B.getInt32(0xf), B.getInt32(0xf), B.getFalse()});
-    NewV = FirstDPP;
+    ExclScan = FirstDPP;
 
     const unsigned Iters = 7;
     const unsigned DPPCtrl[Iters] = {
@@ -295,21 +365,20 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
     // This loop performs an exclusive scan across the wavefront, with all lanes
     // active (by using the WWM intrinsic).
     for (unsigned Idx = 0; Idx < Iters; Idx++) {
-      Value *const UpdateValue = Idx < 3 ? FirstDPP : NewV;
+      Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan;
       CallInst *const DPP = B.CreateIntrinsic(
           Intrinsic::amdgcn_update_dpp, Ty,
           {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]),
            B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()});
 
-      NewV = B.CreateBinOp(Op, NewV, DPP);
+      ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP);
     }
 
-    LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
-    NewV = B.CreateBinOp(Op, SetInactive, NewV);
+    NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan);
 
     // Read the value from the last lane, which has accumlated the values of
-    // each active lane in the wavefront. This will be our new value with which
-    // we will provide to the atomic operation.
+    // each active lane in the wavefront. This will be our new value which we
+    // will provide to the atomic operation.
     if (TyBitWidth == 64) {
       Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty());
       Value *const ExtractHi =
@@ -324,9 +393,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
           B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1));
       NewV = B.CreateBitCast(Insert, Ty);
     } else if (TyBitWidth == 32) {
-      CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane,
-                                                   {}, {NewV, B.getInt32(63)});
-      NewV = ReadLane;
+      NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+                               {NewV, B.getInt32(63)});
     } else {
       llvm_unreachable("Unhandled atomic bit width");
     }
@@ -334,14 +402,32 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
     // Finally mark the readlanes in the WWM section.
     NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV);
   } else {
-    // Get the total number of active lanes we have by using popcount.
-    Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot);
-    Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
-
-    // Calculate the new value we will be contributing to the atomic operation
-    // for the entire wavefront.
-    NewV = B.CreateMul(V, CtpopCast);
-    LaneOffset = B.CreateMul(V, MbcntCast);
+    switch (Op) {
+    default:
+      llvm_unreachable("Unhandled atomic op");
+
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub: {
+      // Get the total number of active lanes we have by using popcount.
+      Instruction *const Ctpop =
+          B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot);
+      Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
+
+      // Calculate the new value we will be contributing to the atomic operation
+      // for the entire wavefront.
+      NewV = B.CreateMul(V, CtpopCast);
+      break;
+    }
+
+    case AtomicRMWInst::Max:
+    case AtomicRMWInst::Min:
+    case AtomicRMWInst::UMax:
+    case AtomicRMWInst::UMin:
+      // Max/min with a uniform value is idempotent: doing the atomic operation
+      // multiple times has the same effect as doing it once.
+      NewV = V;
+      break;
+    }
   }
 
   // We only want a single lane to enter our new control flow, and we do this
@@ -407,7 +493,26 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   // get our individual lane's slice into the result. We use the lane offset we
   // previously calculated combined with the atomic result value we got from the
   // first lane, to get our lane's index into the atomic result.
-  Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset);
+  Value *LaneOffset = nullptr;
+  if (ValDivergent) {
+    LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan);
+  } else {
+    switch (Op) {
+    default:
+      llvm_unreachable("Unhandled atomic op");
+    case AtomicRMWInst::Add:
+    case AtomicRMWInst::Sub:
+      LaneOffset = B.CreateMul(V, MbcntCast);
+      break;
+    case AtomicRMWInst::Max:
+    case AtomicRMWInst::Min:
+    case AtomicRMWInst::UMax:
+    case AtomicRMWInst::UMin:
+      LaneOffset = B.CreateSelect(Cond, Identity, V);
+      break;
+    }
+  }
+  Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
 
   if (IsPixelShader) {
     // Need a final PHI to reconverge to above the helper lane branch mask.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index f3d50c9c490f9..5f7649c1c0ea5 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -194,3 +194,111 @@ entry:
   store i64 %old, i64 addrspace(1)* %out
   ret void
 }
+
+; GCN-LABEL: max_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw max i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: max_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_max_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw max i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: min_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw min i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: min_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_min_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw min i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umax_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw umax i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umax_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_max_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw umax i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umin_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw umin i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umin_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_min_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw umin i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}

From d100b5dd0197df615ac0ffc1619aec796cbdc0be Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Tue, 16 Jul 2019 18:04:26 +0000
Subject: [PATCH 262/451] Teach `llvm-pdbutil pretty -native` about
 `-injected-sources`

`pretty -native -injected-sources -injected-source-content` works with
this patch, and produces identical output to the dia version.

Differential Revision: https://reviews.llvm.org/D64428

llvm-svn: 366236
---
 .../llvm/DebugInfo/PDB/Native/HashTable.h     |  12 +-
 .../PDB/Native/InjectedSourceStream.h         |  44 +++++++
 .../PDB/Native/NativeEnumInjectedSources.h    |  43 +++++++
 .../llvm/DebugInfo/PDB/Native/PDBFile.h       |   6 +
 llvm/lib/DebugInfo/PDB/CMakeLists.txt         |   2 +
 .../PDB/Native/InjectedSourceStream.cpp       |  65 ++++++++++
 .../PDB/Native/NativeEnumInjectedSources.cpp  | 121 ++++++++++++++++++
 .../DebugInfo/PDB/Native/NativeSession.cpp    |  13 +-
 llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp     |  57 +++++++--
 .../llvm-pdbutil/injected-sources-native.test |  30 +++++
 llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp      |   9 +-
 .../secondary/llvm/lib/DebugInfo/PDB/BUILD.gn |   2 +
 12 files changed, 383 insertions(+), 21 deletions(-)
 create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
 create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
 create mode 100644 llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
 create mode 100644 llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
 create mode 100644 llvm/test/tools/llvm-pdbutil/injected-sources-native.test

diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index e045cc28f71ae..aa38417bcf4c1 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -72,6 +72,12 @@ class HashTableIterator
     assert(Map->Present.test(Index));
     return Map->Buckets[Index];
   }
+
+  // Implement postfix op++ in terms of prefix op++ by using the superclass
+  // implementation.
+  using iterator_facade_base<HashTableIterator<ValueT>,
+                             std::forward_iterator_tag,
+                             const std::pair<uint32_t, ValueT>>::operator++;
   HashTableIterator &operator++() {
     while (Index < Map->Buckets.size()) {
       ++Index;
@@ -94,9 +100,6 @@ class HashTableIterator
 
 template <typename ValueT>
 class HashTable {
-  using const_iterator = HashTableIterator<ValueT>;
-  friend const_iterator;
-
   struct Header {
     support::ulittle32_t Size;
     support::ulittle32_t Capacity;
@@ -105,6 +108,9 @@ class HashTable {
   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
 
 public:
+  using const_iterator = HashTableIterator<ValueT>;
+  friend const_iterator;
+
   HashTable() { Buckets.resize(8); }
   explicit HashTable(uint32_t Capacity) {
     Buckets.resize(Capacity);
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
new file mode 100644
index 0000000000000..d0cac3749bcab
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h
@@ -0,0 +1,44 @@
+//===- InjectedSourceStream.h - PDB Headerblock Stream Access ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H
+
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace msf {
+class MappedBlockStream;
+}
+namespace pdb {
+class PDBFile;
+class PDBStringTable;
+
+class InjectedSourceStream {
+public:
+  InjectedSourceStream(std::unique_ptr<msf::MappedBlockStream> Stream);
+  Error reload(const PDBStringTable &Strings);
+
+  using const_iterator = HashTable<SrcHeaderBlockEntry>::const_iterator;
+  const_iterator begin() const { return InjectedSourceTable.begin(); }
+  const_iterator end() const { return InjectedSourceTable.end(); }
+
+  uint32_t size() const { return InjectedSourceTable.size(); }
+
+private:
+  std::unique_ptr<msf::MappedBlockStream> Stream;
+
+  const SrcHeaderBlockHeader* Header;
+  HashTable<SrcHeaderBlockEntry> InjectedSourceTable;
+};
+}
+}
+
+#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
new file mode 100644
index 0000000000000..ca1e22bd82a2b
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h
@@ -0,0 +1,43 @@
+//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H
+#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H
+
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h"
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
+
+namespace llvm {
+namespace pdb {
+
+class InjectedSourceStream;
+class PDBStringTable;
+
+class NativeEnumInjectedSources : public IPDBEnumChildren<IPDBInjectedSource> {
+public:
+  NativeEnumInjectedSources(PDBFile &File, const InjectedSourceStream &IJS,
+                            const PDBStringTable &Strings);
+
+  uint32_t getChildCount() const override;
+  std::unique_ptr<IPDBInjectedSource>
+  getChildAtIndex(uint32_t Index) const override;
+  std::unique_ptr<IPDBInjectedSource> getNext() override;
+  void reset() override;
+
+private:
+  PDBFile &File;
+  const InjectedSourceStream &Stream;
+  const PDBStringTable &Strings;
+  InjectedSourceStream::const_iterator Cur;
+};
+
+} // namespace pdb
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h
index 92c1e0fe2fe60..56de4030167de 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h
@@ -32,6 +32,7 @@ namespace pdb {
 class DbiStream;
 class GlobalsStream;
 class InfoStream;
+class InjectedSourceStream;
 class PDBStringTable;
 class PDBFileBuilder;
 class PublicsStream;
@@ -87,6 +88,8 @@ class PDBFile : public msf::IMSFFile {
   createIndexedStream(uint16_t SN) const;
   Expected<std::unique_ptr<msf::MappedBlockStream>>
   safelyCreateIndexedStream(uint32_t StreamIndex) const;
+  Expected<std::unique_ptr<msf::MappedBlockStream>>
+  safelyCreateNamedStream(StringRef Name);
 
   msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const;
   msf::MSFStreamLayout getFpmStreamLayout() const;
@@ -102,6 +105,7 @@ class PDBFile : public msf::IMSFFile {
   Expected<PublicsStream &> getPDBPublicsStream();
   Expected<SymbolStream &> getPDBSymbolStream();
   Expected<PDBStringTable &> getStringTable();
+  Expected<InjectedSourceStream &> getInjectedSourceStream();
 
   BumpPtrAllocator &getAllocator() { return Allocator; }
 
@@ -113,6 +117,7 @@ class PDBFile : public msf::IMSFFile {
   bool hasPDBSymbolStream();
   bool hasPDBTpiStream() const;
   bool hasPDBStringTable();
+  bool hasPDBInjectedSourceStream();
 
   uint32_t getPointerSize();
 
@@ -133,6 +138,7 @@ class PDBFile : public msf::IMSFFile {
   std::unique_ptr<SymbolStream> Symbols;
   std::unique_ptr<msf::MappedBlockStream> DirectoryStream;
   std::unique_ptr<msf::MappedBlockStream> StringTableStream;
+  std::unique_ptr<InjectedSourceStream> InjectedSources;
   std::unique_ptr<PDBStringTable> Strings;
 };
 }
diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt
index d9d379f6d0912..0e842af9f18f2 100644
--- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt
@@ -47,9 +47,11 @@ add_pdb_impl_folder(Native
   Native/HashTable.cpp
   Native/InfoStream.cpp
   Native/InfoStreamBuilder.cpp
+  Native/InjectedSourceStream.cpp
   Native/ModuleDebugStream.cpp
   Native/NativeCompilandSymbol.cpp
   Native/NativeEnumGlobals.cpp
+  Native/NativeEnumInjectedSources.cpp
   Native/NativeEnumModules.cpp
   Native/NativeEnumTypes.cpp
   Native/NativeExeSymbol.cpp
diff --git a/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
new file mode 100644
index 0000000000000..3f4101db7b93e
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp
@@ -0,0 +1,65 @@
+//===- InjectedSourceStream.cpp - PDB Headerblock Stream Access -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
+
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+InjectedSourceStream::InjectedSourceStream(
+    std::unique_ptr<MappedBlockStream> Stream)
+    : Stream(std::move(Stream)) {}
+
+Error InjectedSourceStream::reload(const PDBStringTable &Strings) {
+  BinaryStreamReader Reader(*Stream);
+
+  if (auto EC = Reader.readObject(Header))
+    return EC;
+
+  if (Header->Version !=
+      static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid headerblock header version");
+
+  if (auto EC = InjectedSourceTable.load(Reader))
+    return EC;
+
+  for (const auto& Entry : *this) {
+    if (Entry.second.Size != sizeof(SrcHeaderBlockEntry))
+      return make_error<RawError>(raw_error_code::corrupt_file,
+                                  "Invalid headerbock entry size");
+    if (Entry.second.Version !=
+        static_cast<uint32_t>(PdbRaw_SrcHeaderBlockVer::SrcVerOne))
+      return make_error<RawError>(raw_error_code::corrupt_file,
+                                  "Invalid headerbock entry version");
+
+    // Check that all name references are valid.
+    auto Name = Strings.getStringForID(Entry.second.FileNI);
+    if (!Name)
+      return Name.takeError();
+    auto ObjName = Strings.getStringForID(Entry.second.ObjNI);
+    if (!ObjName)
+      return ObjName.takeError();
+    auto VName = Strings.getStringForID(Entry.second.VFileNI);
+    if (!VName)
+      return VName.takeError();
+  }
+
+  assert(Reader.bytesRemaining() == 0);
+  return Error::success();
+}
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
new file mode 100644
index 0000000000000..7c7901b708cc8
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -0,0 +1,121 @@
+//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
+
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+
+namespace llvm {
+namespace pdb {
+
+namespace {
+
+Expected<std::string> readStreamData(BinaryStream &Stream) {
+  uint32_t Offset = 0, DataLength = Stream.getLength();
+  std::string Result;
+  Result.reserve(DataLength);
+  while (Offset < DataLength) {
+    ArrayRef<uint8_t> Data;
+    if (auto E = Stream.readLongestContiguousChunk(Offset, Data))
+      return std::move(E);
+    Offset += Data.size();
+    Result += toStringRef(Data);
+  }
+  return Result;
+}
+
+class NativeInjectedSource final : public IPDBInjectedSource {
+  const SrcHeaderBlockEntry &Entry;
+  const PDBStringTable &Strings;
+  PDBFile &File;
+
+public:
+  NativeInjectedSource(const SrcHeaderBlockEntry &Entry,
+                       PDBFile &File, const PDBStringTable &Strings)
+      : Entry(Entry), Strings(Strings), File(File) {}
+
+  uint32_t getCrc32() const override { return Entry.CRC; }
+  uint64_t getCodeByteSize() const override { return Entry.FileSize; }
+
+  std::string getFileName() const override {
+    auto Name = Strings.getStringForID(Entry.FileNI);
+    assert(Name && "InjectedSourceStream should have rejected this");
+    return *Name;
+  }
+
+  std::string getObjectFileName() const override {
+    auto ObjName = Strings.getStringForID(Entry.ObjNI);
+    assert(ObjName && "InjectedSourceStream should have rejected this");
+    return *ObjName;
+  }
+
+  std::string getVirtualFileName() const override {
+    auto VName = Strings.getStringForID(Entry.VFileNI);
+    assert(VName && "InjectedSourceStream should have rejected this");
+    return *VName;
+  }
+
+  PDB_SourceCompression getCompression() const override {
+    return static_cast<PDB_SourceCompression>(Entry.Compression);
+  }
+
+  std::string getCode() const override {
+    // Get name of stream storing the data.
+    auto VName = Strings.getStringForID(Entry.VFileNI);
+    assert(VName && "InjectedSourceStream should have rejected this");
+    std::string StreamName = ("/src/files/" + *VName).str();
+
+    // Find stream with that name and read its data.
+    // FIXME: Consider validating (or even loading) all this in
+    // InjectedSourceStream so that no error can happen here.
+    auto ExpectedFileStream = File.safelyCreateNamedStream(StreamName);
+    if (!ExpectedFileStream) {
+      consumeError(ExpectedFileStream.takeError());
+      return "(failed to open data stream)";
+    }
+
+    auto Data = readStreamData(**ExpectedFileStream);
+    if (!Data) {
+      consumeError(Data.takeError());
+      return "(failed to read data)";
+    }
+    return *Data;
+  }
+};
+
+} // namespace
+
+NativeEnumInjectedSources::NativeEnumInjectedSources(
+    PDBFile &File, const InjectedSourceStream &IJS,
+    const PDBStringTable &Strings)
+    : File(File), Stream(IJS), Strings(Strings), Cur(Stream.begin()) {}
+
+uint32_t NativeEnumInjectedSources::getChildCount() const {
+  return static_cast<uint32_t>(Stream.size());
+}
+
+std::unique_ptr<IPDBInjectedSource>
+NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const {
+  if (N >= getChildCount())
+    return nullptr;
+  return make_unique<NativeInjectedSource>(std::next(Stream.begin(), N)->second,
+                                           File, Strings);
+}
+
+std::unique_ptr<IPDBInjectedSource> NativeEnumInjectedSources::getNext() {
+  if (Cur == Stream.end())
+    return nullptr;
+  return make_unique<NativeInjectedSource>((Cur++)->second, File, Strings);
+}
+
+void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); }
+
+}
+}
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index 5fb2ea3fec5db..8a49cb1c5963e 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -13,6 +13,7 @@
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
 #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
 #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h"
 #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
 #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h"
@@ -191,7 +192,17 @@ std::unique_ptr<IPDBEnumTables> NativeSession::getEnumTables() const {
 
 std::unique_ptr<IPDBEnumInjectedSources>
 NativeSession::getInjectedSources() const {
-  return nullptr;
+  auto ISS = Pdb->getInjectedSourceStream();
+  if (!ISS) {
+    consumeError(ISS.takeError());
+    return nullptr;
+  }
+  auto Strings = Pdb->getStringTable();
+  if (!Strings) {
+    consumeError(Strings.takeError());
+    return nullptr;
+  }
+  return make_unique<NativeEnumInjectedSources>(*Pdb, *ISS, *Strings);
 }
 
 std::unique_ptr<IPDBEnumSectionContribs>
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
index f1255d5d67718..983031dfcb78a 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -14,6 +14,7 @@
 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Native/RawError.h"
@@ -365,16 +366,7 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
 
 Expected<PDBStringTable &> PDBFile::getStringTable() {
   if (!Strings) {
-    auto IS = getPDBInfoStream();
-    if (!IS)
-      return IS.takeError();
-
-    Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
-    if (!ExpectedNSI)
-      return ExpectedNSI.takeError();
-    uint32_t NameStreamIndex = *ExpectedNSI;
-
-    auto NS = safelyCreateIndexedStream(NameStreamIndex);
+    auto NS = safelyCreateNamedStream("/names");
     if (!NS)
       return NS.takeError();
 
@@ -389,6 +381,24 @@ Expected<PDBStringTable &> PDBFile::getStringTable() {
   return *Strings;
 }
 
+Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
+  if (!InjectedSources) {
+    auto IJS = safelyCreateNamedStream("/src/headerblock");
+    if (!IJS)
+      return IJS.takeError();
+
+    auto Strings = getStringTable();
+    if (!Strings)
+      return Strings.takeError();
+
+    auto IJ = llvm::make_unique<InjectedSourceStream>(std::move(*IJS));
+    if (auto EC = IJ->reload(*Strings))
+      return std::move(EC);
+    InjectedSources = std::move(IJ);
+  }
+  return *InjectedSources;
+}
+
 uint32_t PDBFile::getPointerSize() {
   auto DbiS = getPDBDbiStream();
   if (!DbiS)
@@ -457,6 +467,19 @@ bool PDBFile::hasPDBStringTable() {
   return true;
 }
 
+bool PDBFile::hasPDBInjectedSourceStream() {
+  auto IS = getPDBInfoStream();
+  if (!IS)
+    return false;
+  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
+  if (!ExpectedNSI) {
+    consumeError(ExpectedNSI.takeError());
+    return false;
+  }
+  assert(*ExpectedNSI < getNumStreams());
+  return true;
+}
+
 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
 /// stream with that index actually exists.  If it does not, the return value
 /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
@@ -468,3 +491,17 @@ PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
     return make_error<RawError>(raw_error_code::no_stream);
   return createIndexedStream(StreamIndex);
 }
+
+Expected<std::unique_ptr<MappedBlockStream>>
+PDBFile::safelyCreateNamedStream(StringRef Name) {
+  auto IS = getPDBInfoStream();
+  if (!IS)
+    return IS.takeError();
+
+  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
+  if (!ExpectedNSI)
+    return ExpectedNSI.takeError();
+  uint32_t NameStreamIndex = *ExpectedNSI;
+
+  return safelyCreateIndexedStream(NameStreamIndex);
+}
diff --git a/llvm/test/tools/llvm-pdbutil/injected-sources-native.test b/llvm/test/tools/llvm-pdbutil/injected-sources-native.test
new file mode 100644
index 0000000000000..374f14fc32102
--- /dev/null
+++ b/llvm/test/tools/llvm-pdbutil/injected-sources-native.test
@@ -0,0 +1,30 @@
+; This is identical to injected-sources.test, except that it uses the -native
+; mode of pretty (and hence doesn't require diasdk and runs on all platforms).
+
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/InjectedSource.pdb | FileCheck %s
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/ClassLayoutTest.pdb | FileCheck --check-prefix=NEGATIVE %s
+
+; CHECK:      ---INJECTED SOURCES---
+; CHECK:      c.natvis (140 bytes): obj=<null>, vname=c.natvis, crc=334478030, compression=None
+; CHECK-NEXT: <?xml version="1.0" encoding="utf-8"?>
+; CHECK-NEXT: <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+; CHECK-NEXT: </AutoVisualizer>
+; CHECK:      a.natvis (140 bytes): obj=<null>, vname=a.natvis, crc=334478030, compression=None
+; CHECK-NEXT: <?xml version="1.0" encoding="utf-8"?>
+; CHECK-NEXT: <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+; CHECK-NEXT: </AutoVisualizer>
+; CHECK:      b.natvis (294 bytes): obj=<null>, vname=b.natvis, crc=2059731902, compression=None
+; CHECK-NEXT: <?xml version="1.0" encoding="utf-8"?>
+; CHECK-NEXT: <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+; CHECK-NEXT: <Type Name="Baz">
+; CHECK-NEXT:   <DisplayString>Third test</DisplayString>
+; CHECK-NEXT: </Type>
+; CHECK-NEXT: <Type Name="Buzz">
+; CHECK-NEXT:   <DisplayString>Fourth test</DisplayString>
+; CHECK-NEXT: </Type>
+; CHECK-NEXT: </AutoVisualizer>
+
+; NEGATIVE:      ---INJECTED SOURCES---
+; NEGATIVE-NEXT: There are no injected sources.
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index a19257af38d65..e6e89d4bf2201 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -934,7 +934,7 @@ static std::string stringOr(std::string Str, std::string IfEmpty) {
 
 static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
   auto Sources = Session.getInjectedSources();
-  if (0 == Sources->getChildCount()) {
+  if (!Sources || !Sources->getChildCount()) {
     Printer.printLine("There are no injected sources.");
     return;
   }
@@ -1279,12 +1279,7 @@ static void dumpPretty(StringRef Path) {
     WithColor(Printer, PDB_ColorItem::SectionHeader).get()
         << "---INJECTED SOURCES---";
     AutoIndent Indent1(Printer);
-
-    if (ReaderType == PDB_ReaderType::Native)
-      Printer.printLine(
-          "Injected sources are not supported with the native reader.");
-    else
-      dumpInjectedSources(Printer, *Session);
+    dumpInjectedSources(Printer, *Session);
   }
 
   Printer.NewLine();
diff --git a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn
index 7b8adb3b49a2a..d38b2bb214cc0 100644
--- a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn
@@ -24,10 +24,12 @@ static_library("PDB") {
     "Native/HashTable.cpp",
     "Native/InfoStream.cpp",
     "Native/InfoStreamBuilder.cpp",
+    "Native/InjectedSourceStream.cpp",
     "Native/ModuleDebugStream.cpp",
     "Native/NamedStreamMap.cpp",
     "Native/NativeCompilandSymbol.cpp",
     "Native/NativeEnumGlobals.cpp",
+    "Native/NativeEnumInjectedSources.cpp",
     "Native/NativeEnumModules.cpp",
     "Native/NativeEnumTypes.cpp",
     "Native/NativeExeSymbol.cpp",

From 35c96598b1246ea038677d7c4580f3c758ff1d93 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 18:05:29 +0000
Subject: [PATCH 263/451] AMDGPU/GlobalISel: Select flat loads

Now that the patterns use the new PatFrag address space support, the
only blocker to importing most load patterns is the addressing mode
complex patterns.

llvm-svn: 366237
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td         |    8 +
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   17 +-
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   96 +-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |    9 +
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |    3 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |   19 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |    6 +
 .../GlobalISel/inst-select-load-flat.mir      | 1709 ++++++++++++++++-
 .../GlobalISel/inst-select-load-global.mir    | 1657 ++++++++++++++++
 9 files changed, 3459 insertions(+), 65 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 6f725d609072b..1ccb90b2587ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -50,6 +50,14 @@ def gi_smrd_sgpr :
     GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
     GIComplexPatternEquiv<SMRDSgpr>;
 
+def gi_flat_offset :
+    GIComplexOperandMatcher<s64, "selectFlatOffset">,
+    GIComplexPatternEquiv<FLATOffset>;
+def gi_flat_offset_signed :
+    GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
+    GIComplexPatternEquiv<FLATOffsetSigned>;
+
+
 class GISelSop2Pat <
   SDPatternOperator node,
   Instruction inst,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 14ae62968c65b..39016ed371935 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2937,18 +2937,11 @@ bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned,
     SDValue N1 = Addr.getOperand(1);
     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
 
-    if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
-      if ((IsSigned && isInt<12>(COffsetVal)) ||
-          (!IsSigned && isUInt<11>(COffsetVal))) {
-        Addr = N0;
-        OffsetVal = COffsetVal;
-      }
-    } else {
-      if ((IsSigned && isInt<13>(COffsetVal)) ||
-          (!IsSigned && isUInt<12>(COffsetVal))) {
-        Addr = N0;
-        OffsetVal = COffsetVal;
-      }
+    const SIInstrInfo *TII = ST.getInstrInfo();
+    if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
+                               IsSigned)) {
+      Addr = N0;
+      OffsetVal = COffsetVal;
     }
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aa634e881d870..f8f89593d0805 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1239,47 +1239,9 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
 }
 
 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
-  MachineBasicBlock *BB = I.getParent();
-  MachineFunction *MF = BB->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-  const DebugLoc &DL = I.getDebugLoc();
-  Register DstReg = I.getOperand(0).getReg();
-  Register PtrReg = I.getOperand(1).getReg();
-  unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
-  unsigned Opcode;
-
-  if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
-    LLVM_DEBUG(dbgs() << "Unhandled address space\n");
-    return false;
-  }
-
-  SmallVector<GEPInfo, 4> AddrInfo;
-
-  getAddrModeInfo(I, MRI, AddrInfo);
-
-  switch (LoadSize) {
-  case 32:
-    Opcode = AMDGPU::FLAT_LOAD_DWORD;
-    break;
-  case 64:
-    Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
-    break;
-  default:
-    LLVM_DEBUG(dbgs() << "Unhandled load size\n");
-    return false;
-  }
-
-  MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
-                               .add(I.getOperand(0))
-                               .addReg(PtrReg)
-                               .addImm(0)  // offset
-                               .addImm(0)  // glc
-                               .addImm(0)  // slc
-                               .addImm(0); // dlc
-
-  bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
-  I.eraseFromParent();
-  return Ret;
+  // TODO: Can/should we insert m0 initialization here for DS instructions and
+  // call the normal selector?
+  return false;
 }
 
 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
@@ -1397,9 +1359,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
       return true;
     return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_LOAD:
-    if (selectImpl(I, CoverageInfo))
-      return true;
-    return selectG_LOAD(I);
+    return selectImpl(I, CoverageInfo);
   case TargetOpcode::G_SELECT:
     return selectG_SELECT(I);
   case TargetOpcode::G_STORE:
@@ -1584,3 +1544,51 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
     [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
   }};
 }
+
+  template <bool Signed>
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  InstructionSelector::ComplexRendererFns Default = {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
+    }};
+
+  if (!STI.hasFlatInstOffsets())
+    return Default;
+
+  const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
+  if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
+    return Default;
+
+  Optional<int64_t> Offset =
+    getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
+  if (!Offset.hasValue())
+    return Default;
+
+  unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
+  if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
+    return Default;
+
+  Register BasePtr = OpDef->getOperand(1).getReg();
+
+  return {{
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
+    }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
+  return selectFlatOffsetImpl<false>(Root);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
+  return selectFlatOffsetImpl<true>(Root);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 1027a0b5683d3..e30d745f5cb64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -119,6 +119,15 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   InstructionSelector::ComplexRendererFns
   selectSmrdSgpr(MachineOperand &Root) const;
 
+  template <bool Signed>
+  InstructionSelector::ComplexRendererFns
+  selectFlatOffsetImpl(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectFlatOffset(MachineOperand &Root) const;
+
+  InstructionSelector::ComplexRendererFns
+  selectFlatOffsetSigned(MachineOperand &Root) const;
+
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
   const AMDGPURegisterBankInfo &RBI;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3cf4fbc752493..670f6225fbf78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -497,6 +497,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       .custom();
   }
 
+  // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
+  // handle some operations by just promoting the register during
+  // selection. There are also d16 loads on GFX9+ which preserve the high bits.
   getActionDefinitionsBuilder({G_LOAD, G_STORE})
     .narrowScalarIf([](const LegalityQuery &Query) {
         unsigned Size = Query.Types[0].getSizeInBits();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 34741850f82fb..ba8ed6993a560 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6118,6 +6118,25 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
   return RCID == AMDGPU::SReg_128RegClassID;
 }
 
+bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+                                    bool Signed) const {
+  // TODO: Should 0 be special cased?
+  if (!ST.hasFlatInstOffsets())
+    return false;
+
+  if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
+    return false;
+
+  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+    return (Signed && isInt<12>(Offset)) ||
+           (!Signed && isUInt<11>(Offset));
+  }
+
+  return (Signed && isInt<13>(Offset)) ||
+         (!Signed && isUInt<12>(Offset));
+}
+
+
 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
 enum SIEncodingFamily {
   SI = 0,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1f3c659f9d9ca..3ff35da0b9630 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -970,6 +970,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return isUInt<12>(Imm);
   }
 
+  /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
+  /// encoded instruction. If \p Signed, this is for an instruction that
+  /// interprets the offset as signed.
+  bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+                         bool Signed) const;
+
   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
   /// Return -1 if the target-specific opcode for the pseudo instruction does
   /// not exist. If Opcode is not a pseudo instruction, this is identity.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index 8069dff2634f3..f579c3ce28767 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -1,26 +1,1717 @@
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+
+
+---
+
+name: load_flat_s32_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_4
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; GFX8-LABEL: name: load_flat_s32_from_4
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; GFX9-LABEL: name: load_flat_s32_from_4
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; GFX10-LABEL: name: load_flat_s32_from_4
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_flat_s32_from_2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_2
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+    ; GFX8-LABEL: name: load_flat_s32_from_2
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+    ; GFX9-LABEL: name: load_flat_s32_from_2
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+    ; GFX10-LABEL: name: load_flat_s32_from_2
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_flat_s32_from_1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_flat_v2s32
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v2s32
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+    ; GFX8-LABEL: name: load_flat_v2s32
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+    ; GFX9-LABEL: name: load_flat_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+    ; GFX10-LABEL: name: load_flat_v2s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_flat_v3s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v3s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
+    ; GFX8-LABEL: name: load_flat_v3s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
+    ; GFX9-LABEL: name: load_flat_v3s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
+    ; GFX10-LABEL: name: load_flat_v3s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<3 x  s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_flat_v4s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v4s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+    ; GFX8-LABEL: name: load_flat_v4s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+    ; GFX9-LABEL: name: load_flat_v4s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+    ; GFX10-LABEL: name: load_flat_v4s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x  s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_flat_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s64
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX8-LABEL: name: load_flat_s64
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_flat_s64
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-LABEL: name: load_flat_s64
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_flat_v2s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v2s64
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX8-LABEL: name: load_flat_v2s64
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-LABEL: name: load_flat_v2s64
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX10-LABEL: name: load_flat_v2s64
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_flat_v2p1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v2p1
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX8-LABEL: name: load_flat_v2p1
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX9-LABEL: name: load_flat_v2p1
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX10-LABEL: name: load_flat_v2p1
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_flat_s96
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s96
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    ; GFX8-LABEL: name: load_flat_s96
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    ; GFX9-LABEL: name: load_flat_s96
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    ; GFX10-LABEL: name: load_flat_s96
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_flat_s128
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s128
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX8-LABEL: name: load_flat_s128
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-LABEL: name: load_flat_s128
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX10-LABEL: name: load_flat_s128
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_flat_p3_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_p3_from_4
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX8-LABEL: name: load_flat_p3_from_4
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX8: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-LABEL: name: load_flat_p3_from_4
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX10-LABEL: name: load_flat_p3_from_4
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX10: $vgpr0 = COPY [[LOAD]](p3)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_flat_p1_from_8
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_p1_from_8
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX8-LABEL: name: load_flat_p1_from_8
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-LABEL: name: load_flat_p1_from_8
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-LABEL: name: load_flat_p1_from_8
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_flat_p999_from_8
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_p999_from_8
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX8-LABEL: name: load_flat_p999_from_8
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX9-LABEL: name: load_flat_p999_from_8
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX10-LABEL: name: load_flat_p999_from_8
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_flat_v2p3
+legalized:       true
+regBankSelected: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v2p3
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX8-LABEL: name: load_flat_v2p3
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-LABEL: name: load_flat_v2p3
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX10-LABEL: name: load_flat_v2p3
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_flat_v2s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v2s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX8-LABEL: name: load_flat_v2s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-LABEL: name: load_flat_v2s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX10-LABEL: name: load_flat_v2s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4)
+    ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_flat_v4s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v4s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX8-LABEL: name: load_flat_v4s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-LABEL: name: load_flat_v4s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-LABEL: name: load_flat_v4s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_flat_v6s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v6s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    ; GFX8-LABEL: name: load_flat_v6s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    ; GFX9-LABEL: name: load_flat_v6s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    ; GFX10-LABEL: name: load_flat_v6s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<6 x  s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_flat_v8s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_v8s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-LABEL: name: load_flat_v8s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-LABEL: name: load_flat_v8s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-LABEL: name: load_flat_v8s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<8 x  s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+################################################################################
+### Stress addressing modes
+################################################################################
+
+---
+
+name: load_flat_s32_from_1_gep_2047
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2047
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 2047
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_2048
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2048
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 2048
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_m2047
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2047
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -2047
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_m2048
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2048
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -2048
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4095
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 4095
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4096
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 4096
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_m4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4095
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -4095
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_m4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4096
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -4096
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_8191
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8191
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 8191
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
 
---- |
-  define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
 ...
+
 ---
 
-name:            global_addrspace
+name: load_flat_s32_from_1_gep_8192
 legalized:       true
 regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8192
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 8192
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
 
-# GCN: global_addrspace
-# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-# GCN: FLAT_LOAD_DWORD  [[PTR]], 0, 0, 0, 0
+...
+
+---
+
+name: load_flat_s32_from_1_gep_m8191
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
 
 body: |
   bb.0:
     liveins:  $vgpr0_vgpr1
 
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8191
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
-    %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0)
+    %1:vgpr(s64) = G_CONSTANT i64 -8191
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_flat_s32_from_1_gep_m8192
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8192
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
+    ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -8192
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_atomic_flat_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_atomic_flat_s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
+    ; GFX7: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX8-LABEL: name: load_atomic_flat_s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
+    ; GFX8: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-LABEL: name: load_atomic_flat_s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-LABEL: name: load_atomic_flat_s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4)
+    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load monotonic  4, align 4, addrspace 0)
     $vgpr0 = COPY %1
 
 ...
+
 ---
+
+name: load_atomic_flat_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_atomic_flat_s64
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX8-LABEL: name: load_atomic_flat_s64
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_atomic_flat_s64
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-LABEL: name: load_atomic_flat_s64
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 0)
+    $vgpr0_vgpr1 = COPY %1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
new file mode 100644
index 0000000000000..df86d18c3b335
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
@@ -0,0 +1,1657 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+
+# FIXME: global with MUBUF
+
+---
+
+name: load_global_s32_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_4
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; GFX8-LABEL: name: load_global_s32_from_4
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
+    ; GFX9-LABEL: name: load_global_s32_from_4
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    ; GFX10-LABEL: name: load_global_s32_from_4
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_s32_from_2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_2
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+    ; GFX8-LABEL: name: load_global_s32_from_2
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
+    ; GFX9-LABEL: name: load_global_s32_from_2
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
+    ; GFX10-LABEL: name: load_global_s32_from_2
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_s32_from_1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_v2s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v2s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+    ; GFX8-LABEL: name: load_global_v2s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
+    ; GFX9-LABEL: name: load_global_v2s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+    ; GFX10-LABEL: name: load_global_v2s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v3s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v3s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
+    ; GFX8-LABEL: name: load_global_v3s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
+    ; GFX9-LABEL: name: load_global_v3s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
+    ; GFX10-LABEL: name: load_global_v3s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<3 x  s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_global_v4s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v4s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+    ; GFX8-LABEL: name: load_global_v4s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
+    ; GFX9-LABEL: name: load_global_v4s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+    ; GFX10-LABEL: name: load_global_v4s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x  s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_global_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s64
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX8-LABEL: name: load_global_s64
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_global_s64
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-LABEL: name: load_global_s64
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v2s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v2s64
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX8-LABEL: name: load_global_v2s64
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-LABEL: name: load_global_v2s64
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX10-LABEL: name: load_global_v2s64
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_global_v2p1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v2p1
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX8-LABEL: name: load_global_v2p1
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX9-LABEL: name: load_global_v2p1
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX10-LABEL: name: load_global_v2p1
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_global_s96
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s96
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    ; GFX8-LABEL: name: load_global_s96
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    ; GFX9-LABEL: name: load_global_s96
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    ; GFX10-LABEL: name: load_global_s96
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_global_s128
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s128
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX8-LABEL: name: load_global_s128
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-LABEL: name: load_global_s128
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX10-LABEL: name: load_global_s128
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_global_p3_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_p3_from_4
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX8-LABEL: name: load_global_p3_from_4
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-LABEL: name: load_global_p3_from_4
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX10-LABEL: name: load_global_p3_from_4
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[LOAD]](p3)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_p1_from_8
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_p1_from_8
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX8-LABEL: name: load_global_p1_from_8
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX9-LABEL: name: load_global_p1_from_8
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    ; GFX10-LABEL: name: load_global_p1_from_8
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_p999_from_8
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_p999_from_8
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX8-LABEL: name: load_global_p999_from_8
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX9-LABEL: name: load_global_p999_from_8
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX10-LABEL: name: load_global_p999_from_8
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v2p3
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v2p3
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX8-LABEL: name: load_global_v2p3
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-LABEL: name: load_global_v2p3
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX10-LABEL: name: load_global_v2p3
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v2s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v2s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX8-LABEL: name: load_global_v2s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-LABEL: name: load_global_v2s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX10-LABEL: name: load_global_v2s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_global_v4s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v4s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX8-LABEL: name: load_global_v4s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-LABEL: name: load_global_v4s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX10-LABEL: name: load_global_v4s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_global_v6s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v6s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    ; GFX8-LABEL: name: load_global_v6s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    ; GFX9-LABEL: name: load_global_v6s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    ; GFX10-LABEL: name: load_global_v6s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<6 x  s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_global_v8s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_v8s16
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-LABEL: name: load_global_v8s16
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-LABEL: name: load_global_v8s16
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-LABEL: name: load_global_v8s16
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<8 x  s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+################################################################################
+### Stress addressing modes
+################################################################################
+
+---
+
+name: load_global_s32_from_1_gep_2047
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 2047
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_2048
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 2048
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m2047
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -2047
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m2048
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -2048
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 4095
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 4096
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -4095
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -4096
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_8191
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 8191
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_8192
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 8192
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m8191
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -8191
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_global_s32_from_1_gep_m8192
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
+    ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_CONSTANT i64 -8192
+    %2:vgpr(p1) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_atomic_global_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_atomic_global_s32
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
+    ; GFX7: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX8-LABEL: name: load_atomic_global_s32
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
+    ; GFX8: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-LABEL: name: load_atomic_global_s32
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX10-LABEL: name: load_atomic_global_s32
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1)
+    ; GFX10: $vgpr0 = COPY [[LOAD]](s32)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = G_LOAD %0 :: (load monotonic  4, align 4, addrspace 1)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_atomic_global_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1
+
+    ; GFX7-LABEL: name: load_atomic_global_s64
+    ; GFX7: liveins: $vgpr0_vgpr1
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
+    ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX8-LABEL: name: load_atomic_global_s64
+    ; GFX8: liveins: $vgpr0_vgpr1
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_atomic_global_s64
+    ; GFX9: liveins: $vgpr0_vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX10-LABEL: name: load_atomic_global_s64
+    ; GFX10: liveins: $vgpr0_vgpr1
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1)
+    ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 1)
+    $vgpr0_vgpr1 = COPY %1
+
+...

From fe44a531e0e2aba07213442b1930369316a112b0 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 18:17:33 +0000
Subject: [PATCH 264/451] [COFF] Implement /safeseh:no and check @feat.00 flags
 by default

Summary:
Fixes PR41828. Before this, LLD always emitted SafeSEH chunks and
defined __safe_se_handler_table & size. Now, /safeseh:no leaves those
undefined.

Additionally, we were checking for the safeseh @feat.00 flag in two
places: once to emit errors, and once during safeseh table construction.
The error was set up to be off by default, but safeseh is supposed to be
on by default. I combined the two checks, so now LLD emits an error if
an input object lacks @feat.00 and safeseh is enabled. This caused the
majority of 32-bit LLD tests to fail, since many test input object files
lack @feat.00 symbols. I explicitly added -safeseh:no to those tests to
preserve behavior.

Finally, LLD no longer sets IMAGE_DLL_CHARACTERISTICS_NO_SEH if any
input file wasn't compiled for safeseh.

Reviewers: mstorsjo, ruiu, thakis

Reviewed By: ruiu, thakis

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63570

llvm-svn: 366238
---
 lld/COFF/Config.h                           |  1 +
 lld/COFF/Driver.cpp                         | 14 ++----
 lld/COFF/Writer.cpp                         | 16 ++----
 lld/test/COFF/allow-unknown-debug-info.test |  2 +-
 lld/test/COFF/constant.test                 |  4 +-
 lld/test/COFF/def-export-stdcall.s          |  4 +-
 lld/test/COFF/delayimports32.test           |  2 +-
 lld/test/COFF/dllexport.s                   |  2 +-
 lld/test/COFF/entry-drectve.test            |  2 +-
 lld/test/COFF/entry-inference332.test       |  4 +-
 lld/test/COFF/exclude-all.s                 |  2 +-
 lld/test/COFF/export-all.s                  |  6 +--
 lld/test/COFF/export-stdcall.s              |  2 +-
 lld/test/COFF/export32.test                 | 18 +++----
 lld/test/COFF/fixed.test                    |  8 +--
 lld/test/COFF/gfids-relocations32.s         |  2 +-
 lld/test/COFF/hello32.test                  |  5 +-
 lld/test/COFF/largeaddressaware.test        |  2 +-
 lld/test/COFF/loadcfg32.test                |  2 +-
 lld/test/COFF/locally-imported32.test       |  2 +-
 lld/test/COFF/machine.test                  |  8 +--
 lld/test/COFF/no-ipi-stream.test            |  2 +-
 lld/test/COFF/order-i386.test               |  4 +-
 lld/test/COFF/pdb-debug-f.s                 |  2 +-
 lld/test/COFF/pdb-lib.s                     |  2 +-
 lld/test/COFF/pdb-safeseh.yaml              |  2 +-
 lld/test/COFF/pdb-unknown-subsection.s      |  2 +-
 lld/test/COFF/reloc-x86.test                |  2 +-
 lld/test/COFF/safeseh-no.s                  | 56 +++++++++++++++++++++
 lld/test/COFF/subsystem-drectve.test        |  2 +-
 lld/test/COFF/subsystem-inference32.test    |  8 +--
 lld/test/COFF/tls32.test                    |  2 +-
 32 files changed, 118 insertions(+), 74 deletions(-)
 create mode 100644 lld/test/COFF/safeseh-no.s

diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h
index e378b6fc72484..1b0e240427103 100644
--- a/lld/COFF/Config.h
+++ b/lld/COFF/Config.h
@@ -132,6 +132,7 @@ struct Configuration {
   GuardCFLevel guardCF = GuardCFLevel::Off;
 
   // Used for SafeSEH.
+  bool safeSEH = false;
   Symbol *sehTable = nullptr;
   Symbol *sehCount = nullptr;
 
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 6cfd83ab96b6c..d7af50b9318fc 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1556,6 +1556,11 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   }
   config->wordsize = config->is64() ? 8 : 4;
 
+  // Handle /safeseh, x86 only, on by default, except for mingw.
+  if (config->machine == I386 &&
+      args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw))
+    config->safeSEH = true;
+
   // Handle /functionpadmin
   for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt))
     parseFunctionPadMin(arg, config->machine);
@@ -1795,15 +1800,6 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
   if (errorCount())
     return;
 
-  // Handle /safeseh.
-  if (args.hasFlag(OPT_safeseh, OPT_safeseh_no, false)) {
-    for (ObjFile *file : ObjFile::instances)
-      if (!file->hasSafeSEH())
-        error("/safeseh: " + file->getName() + " is not compatible with SEH");
-    if (errorCount())
-      return;
-  }
-
   if (config->mingw) {
     // In MinGW, all symbols are automatically exported if no symbols
     // are chosen to be exported.
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index e4b35a5f8beb9..36ef87de4263e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -917,7 +917,7 @@ void Writer::createMiscChunks() {
   }
 
   // Create SEH table. x86-only.
-  if (config->machine == I386)
+  if (config->safeSEH)
     createSEHTable();
 
   // Create /guard:cf tables if requested.
@@ -1428,23 +1428,15 @@ void Writer::openFile(StringRef path) {
 }
 
 void Writer::createSEHTable() {
-  // Set the no SEH characteristic on x86 binaries unless we find exception
-  // handlers.
-  setNoSEHCharacteristic = true;
-
   SymbolRVASet handlers;
   for (ObjFile *file : ObjFile::instances) {
-    // FIXME: We should error here instead of earlier unless /safeseh:no was
-    // passed.
     if (!file->hasSafeSEH())
-      return;
-
+      error("/safeseh: " + file->getName() + " is not compatible with SEH");
     markSymbolsForRVATable(file, file->getSXDataChunks(), handlers);
   }
 
-  // Remove the "no SEH" characteristic if all object files were built with
-  // safeseh, we found some exception handlers, and there is a load config in
-  // the object.
+  // Set the "no SEH" characteristic if there really were no handlers, or if
+  // there is no load config object to point to the table of handlers.
   setNoSEHCharacteristic =
       handlers.empty() || !symtab->findUnderscore("_load_config_used");
 
diff --git a/lld/test/COFF/allow-unknown-debug-info.test b/lld/test/COFF/allow-unknown-debug-info.test
index c45b98e2ac292..1cc9e9e0b9e27 100644
--- a/lld/test/COFF/allow-unknown-debug-info.test
+++ b/lld/test/COFF/allow-unknown-debug-info.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj %s > %t.obj
-# RUN: lld-link /dll /noentry /debug %t.obj 2>&1 | FileCheck %s
+# RUN: lld-link -safeseh:no /dll /noentry /debug %t.obj 2>&1 | FileCheck %s
 
 # CHECK: ignoring section .debug$S with unrecognized magic 0x1
 
diff --git a/lld/test/COFF/constant.test b/lld/test/COFF/constant.test
index 02d6b3e2ccae7..dc97f1cb9a38f 100644
--- a/lld/test/COFF/constant.test
+++ b/lld/test/COFF/constant.test
@@ -2,5 +2,5 @@ REQUIRES: x86
 RUN: mkdir -p %t
 RUN: llvm-mc -triple i686-unknown-windows-msvc -filetype obj -o %t/import.o %S/Inputs/constant-import.s
 RUN: llc -mtriple i686-unknown-windows-msvc -filetype obj -o %t/export.o %S/Inputs/constant-export.ll
-RUN: lld-link -machine:x86 -dll -out:%t/export.dll %t/export.o -entry:__CFConstantStringClassReference
-RUN: lld-link -machine:x86 -dll -out:%t/import.dll %t/import.o %t/export.lib
+RUN: lld-link -safeseh:no -machine:x86 -dll -out:%t/export.dll %t/export.o -entry:__CFConstantStringClassReference
+RUN: lld-link -safeseh:no -machine:x86 -dll -out:%t/import.dll %t/import.o %t/export.lib
diff --git a/lld/test/COFF/def-export-stdcall.s b/lld/test/COFF/def-export-stdcall.s
index 55709f95843c4..f015e205c74a3 100644
--- a/lld/test/COFF/def-export-stdcall.s
+++ b/lld/test/COFF/def-export-stdcall.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=i686-windows-msvc %s -o %t.obj
 # RUN: echo -e "LIBRARY foo\nEXPORTS\n  stdcall\n  fastcall\n  vectorcall\n  _underscored" > %t.def
-# RUN: lld-link -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib
+# RUN: lld-link -safeseh:no -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib
 # RUN: llvm-readobj %t.lib | FileCheck -check-prefix UNDECORATED-IMPLIB %s
 # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix UNDECORATED-EXPORTS %s
 
@@ -25,7 +25,7 @@
 
 
 # RUN: echo -e "LIBRARY foo\nEXPORTS\n  _stdcall@8\n  @fastcall@8\n  vectorcall@@8" > %t.def
-# RUN: lld-link -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib
+# RUN: lld-link -safeseh:no -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib
 # RUN: llvm-readobj %t.lib | FileCheck -check-prefix DECORATED-IMPLIB %s
 # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix DECORATED-EXPORTS %s
 
diff --git a/lld/test/COFF/delayimports32.test b/lld/test/COFF/delayimports32.test
index b684d4105e97b..0fc90200c1bfa 100644
--- a/lld/test/COFF/delayimports32.test
+++ b/lld/test/COFF/delayimports32.test
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj
-# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \
+# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \
 # RUN:   /entry:main@0 /alternatename:___delayLoadHelper2@8=_main@0 \
 # RUN:   /delayload:std32.dll /out:%t.exe
 # RUN: llvm-readobj --coff-imports %t.exe | FileCheck -check-prefix=IMPORT %s
diff --git a/lld/test/COFF/dllexport.s b/lld/test/COFF/dllexport.s
index b5b7080d16d23..a238b70ce1b4f 100644
--- a/lld/test/COFF/dllexport.s
+++ b/lld/test/COFF/dllexport.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=i686-windows-msvc %s -o %t.obj
 
-# RUN: lld-link -entry:dllmain -dll %t.obj -out:%t.dll -implib:%t.lib
+# RUN: lld-link -safeseh:no -entry:dllmain -dll %t.obj -out:%t.dll -implib:%t.lib
 # RUN: llvm-readobj %t.lib | FileCheck -check-prefix DECORATED-IMPLIB %s
 # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix DECORATED-EXPORTS %s
 
diff --git a/lld/test/COFF/entry-drectve.test b/lld/test/COFF/entry-drectve.test
index e51e7cb201f3f..0848b0a04aeee 100644
--- a/lld/test/COFF/entry-drectve.test
+++ b/lld/test/COFF/entry-drectve.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %s > %t.obj
-# RUN: lld-link /subsystem:console /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /subsystem:console /out:%t.exe %t.obj
 
 --- !COFF
 header:
diff --git a/lld/test/COFF/entry-inference332.test b/lld/test/COFF/entry-inference332.test
index 75c557af47e86..ddeaf280a9ec2 100644
--- a/lld/test/COFF/entry-inference332.test
+++ b/lld/test/COFF/entry-inference332.test
@@ -1,9 +1,9 @@
 # RUN: sed -e s/ENTRYNAME/_mainCRTStartup/ %s | yaml2obj > %t.obj
-# RUN: lld-link /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1
+# RUN: lld-link -safeseh:no /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1
 # RUN: FileCheck %s < %t.log
 
 # RUN: sed -e s/ENTRYNAME/?mainCRTStartup@@YAHXZ/ %s | yaml2obj > %t.obj
-# RUN: lld-link /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1
+# RUN: lld-link -safeseh:no /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1
 # RUN: FileCheck %s < %t.log
 
 # CHECK: Entry name inferred: _mainCRTStartup
diff --git a/lld/test/COFF/exclude-all.s b/lld/test/COFF/exclude-all.s
index e2c23368dfe63..41caece2dd94a 100644
--- a/lld/test/COFF/exclude-all.s
+++ b/lld/test/COFF/exclude-all.s
@@ -25,7 +25,7 @@ _dataSym:
 
 # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj
 #
-# RUN: lld-link -out:%t.dll -dll %t.obj -lldmingw -exclude-all-symbols -output-def:%t.def
+# RUN: lld-link -safeseh:no -out:%t.dll -dll %t.obj -lldmingw -exclude-all-symbols -output-def:%t.def
 # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix=DLLEXPORT %s
 
 # DLLEXPORT: Name: exportfn3
diff --git a/lld/test/COFF/export-all.s b/lld/test/COFF/export-all.s
index 77893193623e1..6292ed33e3583 100644
--- a/lld/test/COFF/export-all.s
+++ b/lld/test/COFF/export-all.s
@@ -42,7 +42,7 @@ __imp__unexported:
 
 # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj
 #
-# RUN: lld-link -out:%t.dll -dll %t.obj -lldmingw -export-all-symbols -output-def:%t.def
+# RUN: lld-link -safeseh:no -out:%t.dll -dll %t.obj -lldmingw -export-all-symbols -output-def:%t.def
 # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix=CHECK2 %s
 # RUN: cat %t.def | FileCheck -check-prefix=CHECK2-DEF %s
 
@@ -69,7 +69,7 @@ __imp__unexported:
 # RUN: llvm-ar rcs %T/libs/libmingwex.a %T/libs/mingwfunc.o
 # RUN: echo -e ".global crtfunc\n.text\ncrtfunc:\nret\n" > %T/libs/crtfunc.s
 # RUN: llvm-mc -triple=x86_64-windows-gnu %T/libs/crtfunc.s -filetype=obj -o %T/libs/crt2.o
-# RUN: lld-link -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o %T/libs/libmingwex.a -output-def:%t.def
+# RUN: lld-link -safeseh:no -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o %T/libs/libmingwex.a -output-def:%t.def
 # RUN: echo "EOF" >> %t.def
 # RUN: cat %t.def | FileCheck -check-prefix=CHECK-EXCLUDE %s
 
@@ -80,7 +80,7 @@ __imp__unexported:
 # Test that libraries included with -wholearchive: are autoexported, even if
 # they are in a library that otherwise normally would be excluded.
 
-# RUN: lld-link -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o -wholearchive:%T/libs/libmingwex.a -output-def:%t.def
+# RUN: lld-link -safeseh:no -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o -wholearchive:%T/libs/libmingwex.a -output-def:%t.def
 # RUN: echo "EOF" >> %t.def
 # RUN: cat %t.def | FileCheck -check-prefix=CHECK-WHOLEARCHIVE %s
 
diff --git a/lld/test/COFF/export-stdcall.s b/lld/test/COFF/export-stdcall.s
index 6ed3e88032435..aa39eaecf6b93 100644
--- a/lld/test/COFF/export-stdcall.s
+++ b/lld/test/COFF/export-stdcall.s
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 # RUN: llvm-mc -triple i686-windows-msvc %s -o %t.obj -filetype=obj
-# RUN: lld-link %t.obj -out:%t.dll -dll -nodefaultlib -noentry -export:foo_std=bar_std -export:foo_fast=bar_fast
+# RUN: lld-link -safeseh:no %t.obj -out:%t.dll -dll -nodefaultlib -noentry -export:foo_std=bar_std -export:foo_fast=bar_fast
 # RUN: llvm-nm %t.lib | FileCheck %s
 
 # MSVC fudges the lookup of 'bar' to allow it to find the stdcall function
diff --git a/lld/test/COFF/export32.test b/lld/test/COFF/export32.test
index 250c305d4d2e6..1251d43aacbc7 100644
--- a/lld/test/COFF/export32.test
+++ b/lld/test/COFF/export32.test
@@ -1,9 +1,9 @@
 # RUN: yaml2obj < %s > %t.obj
 #
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK1 %s
 #
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 /merge:.edata=.rdata
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 /merge:.edata=.rdata
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK1 %s
 # RUN: llvm-readobj --file-headers --sections %t.dll | FileCheck -check-prefix=HEADER-MERGE %s
 
@@ -20,7 +20,7 @@
 # HEADER-MERGE-NEXT: VirtualSize: 0x7E
 # HEADER-MERGE-NEXT: VirtualAddress: 0x2000
 
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1,@5 \
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1,@5 \
 # RUN:   /export:exportfn2 /export:mangled
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK2 %s
 
@@ -37,7 +37,7 @@
 # CHECK2-NEXT:       7   0x1010  exportfn3
 # CHECK2-NEXT:       8   0x1010  mangled
 
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1,@5,noname /export:exportfn2
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1,@5,noname /export:exportfn2
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK3 %s
 
 # CHECK3:      Export Table:
@@ -51,7 +51,7 @@
 # CHECK3-NEXT:       5   0x1008
 # CHECK3-NEXT:       6   0x1010  exportfn2
 
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:f1=exportfn1 /export:f2=exportfn2
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:f1=exportfn1 /export:f2=exportfn2
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK4 %s
 
 # CHECK4:      Export Table:
@@ -64,12 +64,12 @@
 
 # RUN: echo "EXPORTS exportfn1 @3" > %t.def
 # RUN: echo "fn2=exportfn2 @2" >> %t.def
-# RUN: lld-link /out:%t.dll /dll %t.obj /def:%t.def
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /def:%t.def
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK5 %s
 
 # RUN: echo "EXPORTS exportfn1 @ 3" > %t.def
 # RUN: echo "fn2=exportfn2 @ 2" >> %t.def
-# RUN: lld-link /out:%t.dll /dll %t.obj /def:%t.def
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /def:%t.def
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK5 %s
 
 # CHECK5:      Export Table:
@@ -81,14 +81,14 @@
 # CHECK5-NEXT:       3   0x1008  exportfn1
 # CHECK5-NEXT:       4   0x1010  exportfn3
 
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 \
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 \
 # RUN:   /export:exportfn1 /export:exportfn2,@5 >& %t.log
 # RUN: FileCheck -check-prefix=CHECK6 %s < %t.log
 
 # CHECK6:     duplicate /export option: _exportfn2
 # CHECK6-NOT: duplicate /export option: _exportfn1
 
-# RUN: lld-link /out:%t.dll /dll %t.obj /export:foo=mangled
+# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:foo=mangled
 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK7 %s
 
 # CHECK7:      Export Table:
diff --git a/lld/test/COFF/fixed.test b/lld/test/COFF/fixed.test
index 7a5d9e6ea04db..e162570dfc765 100644
--- a/lld/test/COFF/fixed.test
+++ b/lld/test/COFF/fixed.test
@@ -1,21 +1,21 @@
 # REQUIRES: x86
 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj
 #
-# RUN: lld-link %t.obj /fixed %p/Inputs/std32.lib /subsystem:console \
+# RUN: lld-link -safeseh:no %t.obj /fixed %p/Inputs/std32.lib /subsystem:console \
 # RUN:   /entry:main@0 /debug /out:%t.fixed.exe
 # RUN: llvm-readobj --file-headers %t.fixed.exe | \
 # RUN:   FileCheck -check-prefix=EXEFIXED %s
 #
-# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \
+# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \
 # RUN:   /entry:main@0 /debug /out:%t.exe
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=EXEREL %s
 #
 # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj
 #
-# RUN: lld-link %t.obj /dll /fixed /debug /out:%t.fixed.dll
+# RUN: lld-link -safeseh:no %t.obj /dll /fixed /debug /out:%t.fixed.dll
 # RUN: llvm-readobj --file-headers %t.fixed.dll | FileCheck -check-prefix=DLLFIXED %s
 #
-# RUN: lld-link %t.obj /dll /debug /out:%t.dll
+# RUN: lld-link -safeseh:no %t.obj /dll /debug /out:%t.dll
 # RUN: llvm-readobj --file-headers %t.dll | FileCheck -check-prefix=DLLREL %s
 
 EXEFIXED-NOT: IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE
diff --git a/lld/test/COFF/gfids-relocations32.s b/lld/test/COFF/gfids-relocations32.s
index 2b0e425a1c540..6c51f7aca8207 100644
--- a/lld/test/COFF/gfids-relocations32.s
+++ b/lld/test/COFF/gfids-relocations32.s
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 # RUN: llvm-mc -triple i686-pc-win32 %s -filetype=obj -o %t.obj
-# RUN: lld-link %t.obj -guard:cf -out:%t.exe -entry:main
+# RUN: lld-link -safeseh:no %t.obj -guard:cf -out:%t.exe -entry:main
 # RUN: llvm-readobj --coff-load-config %t.exe | FileCheck %s --check-prefix=CHECK
 
 # Only f and _main should go in the table.
diff --git a/lld/test/COFF/hello32.test b/lld/test/COFF/hello32.test
index b53264ac6af81..61418d3d24bb5 100644
--- a/lld/test/COFF/hello32.test
+++ b/lld/test/COFF/hello32.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj
-# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \
+# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \
 # RUN:   /entry:main@0 /out:%t.exe /appcontainer
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=HEADER %s
 # RUN: llvm-readobj --coff-imports %t.exe | FileCheck -check-prefix=IMPORTS %s
@@ -42,10 +42,9 @@ HEADER-NEXT:   MinorSubsystemVersion: 0
 HEADER-NEXT:   SizeOfImage: 20480
 HEADER-NEXT:   SizeOfHeaders: 1024
 HEADER-NEXT:   Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3)
-HEADER-NEXT:   Characteristics [ (0x9540)
+HEADER-NEXT:   Characteristics [ (0x9140)
 HEADER-NEXT:     IMAGE_DLL_CHARACTERISTICS_APPCONTAINER (0x1000)
 HEADER-NEXT:     IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE (0x40)
-HEADER-NEXT:     IMAGE_DLL_CHARACTERISTICS_NO_SEH (0x400)
 HEADER-NEXT:     IMAGE_DLL_CHARACTERISTICS_NX_COMPAT (0x100)
 HEADER-NEXT:     IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE (0x8000)
 HEADER-NEXT:   ]
diff --git a/lld/test/COFF/largeaddressaware.test b/lld/test/COFF/largeaddressaware.test
index 4c2ae56cc5abc..ddd37131a65db 100644
--- a/lld/test/COFF/largeaddressaware.test
+++ b/lld/test/COFF/largeaddressaware.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj
-# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \
+# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \
 # RUN:   /entry:main@0 /out:%t.exe /largeaddressaware
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=HEADER %s
 
diff --git a/lld/test/COFF/loadcfg32.test b/lld/test/COFF/loadcfg32.test
index e211c95e7db09..dffdab8b146e6 100644
--- a/lld/test/COFF/loadcfg32.test
+++ b/lld/test/COFF/loadcfg32.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %s > %t.obj
-# RUN: lld-link /out:%t.exe %t.obj /entry:main /subsystem:console
+# RUN: lld-link -safeseh:no /out:%t.exe %t.obj /entry:main /subsystem:console
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck %s
 
 # CHECK: LoadConfigTableRVA: 0x2000
diff --git a/lld/test/COFF/locally-imported32.test b/lld/test/COFF/locally-imported32.test
index 789c8c8f8094d..993099d423a5c 100644
--- a/lld/test/COFF/locally-imported32.test
+++ b/lld/test/COFF/locally-imported32.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %s > %t.obj
-# RUN: lld-link /out:%t.exe /entry:main %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe /entry:main %t.obj
 # RUN: llvm-objdump -s %t.exe | FileCheck %s
 
 # CHECK:      Contents of section .text:
diff --git a/lld/test/COFF/machine.test b/lld/test/COFF/machine.test
index 2ac276f1ba7cd..921b7e3931493 100644
--- a/lld/test/COFF/machine.test
+++ b/lld/test/COFF/machine.test
@@ -1,16 +1,16 @@
 # RUN: yaml2obj %p/Inputs/machine-x64.yaml > %t.obj
-# RUN: lld-link /entry:main /subsystem:console /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /entry:main /subsystem:console /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=AMD64 %s
-# RUN: lld-link /entry:main /subsystem:console /machine:x64 \
+# RUN: lld-link -safeseh:no /entry:main /subsystem:console /machine:x64 \
 # RUN:   /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=AMD64 %s
 
 AMD64: Machine: IMAGE_FILE_MACHINE_AMD64
 
 # RUN: yaml2obj %p/Inputs/machine-x86.yaml > %t.obj
-# RUN: lld-link /entry:main /subsystem:console /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /entry:main /subsystem:console /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=I386 %s
-# RUN: lld-link /entry:main /subsystem:console /machine:x86 \
+# RUN: lld-link -safeseh:no /entry:main /subsystem:console /machine:x86 \
 # RUN:   /out:%t.exe %t.obj /fixed
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=I386 %s
 
diff --git a/lld/test/COFF/no-ipi-stream.test b/lld/test/COFF/no-ipi-stream.test
index 246c35907058f..e9b2a31bc0a7c 100644
--- a/lld/test/COFF/no-ipi-stream.test
+++ b/lld/test/COFF/no-ipi-stream.test
@@ -1,4 +1,4 @@
 # RUN: rm -rf %t && mkdir %t
 # RUN: yaml2obj < %p/Inputs/no-ipi-stream-obj.obj.yaml > %t/no-ipi-stream-obj.obj
 # RUN: llvm-pdbutil yaml2pdb %p/Inputs/no-ipi-stream-pdb.pdb.yaml -pdb=%t/no-ipi-stream-pdb.pdb
-# RUN: lld-link /dll /noentry /debug %t/no-ipi-stream-obj.obj
+# RUN: lld-link -safeseh:no /dll /noentry /debug %t/no-ipi-stream-obj.obj
diff --git a/lld/test/COFF/order-i386.test b/lld/test/COFF/order-i386.test
index 4cde5fa813f74..acd9ad2aa2c95 100644
--- a/lld/test/COFF/order-i386.test
+++ b/lld/test/COFF/order-i386.test
@@ -3,12 +3,12 @@
 # RUN: echo fn1 > %t.order
 # RUN: echo fn2 >> %t.order
 
-# RUN: lld-link -entry:fn1 -subsystem:console -opt:noref %t.obj \
+# RUN: lld-link -safeseh:no -entry:fn1 -subsystem:console -opt:noref %t.obj \
 # RUN:   -lldmap:- -out:%t.exe -order:@%t.order | FileCheck %s
 # CHECK: fn1
 # CHECK: fn2
 
-# RUN: lld-link -entry:fn1 -subsystem:console -opt:noref %t.obj \
+# RUN: lld-link -safeseh:no -entry:fn1 -subsystem:console -opt:noref %t.obj \
 # RUN:   -lldmap:- -out:%t.exe | FileCheck -check-prefix=DEFAULT %s
 # DEFAULT: fn2
 # DEFAULT: fn1
diff --git a/lld/test/COFF/pdb-debug-f.s b/lld/test/COFF/pdb-debug-f.s
index 624c1192914aa..ccc34558c832b 100644
--- a/lld/test/COFF/pdb-debug-f.s
+++ b/lld/test/COFF/pdb-debug-f.s
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 # RUN: llvm-mc -triple=i386-pc-win32 -filetype=obj -o %t.obj %s
-# RUN: lld-link /subsystem:console /debug /nodefaultlib /entry:foo /out:%t.exe /pdb:%t.pdb %t.obj
+# RUN: lld-link -safeseh:no /subsystem:console /debug /nodefaultlib /entry:foo /out:%t.exe /pdb:%t.pdb %t.obj
 # RUN: llvm-pdbutil dump -fpo %t.pdb | FileCheck %s
 
 # CHECK:                         Old FPO Data
diff --git a/lld/test/COFF/pdb-lib.s b/lld/test/COFF/pdb-lib.s
index dacf5f27a319b..09f1892069a45 100644
--- a/lld/test/COFF/pdb-lib.s
+++ b/lld/test/COFF/pdb-lib.s
@@ -3,7 +3,7 @@
 # RUN: llvm-mc -filetype=obj -triple=i686-windows-msvc %s -o foo.obj
 # RUN: llc %S/Inputs/bar.ll -filetype=obj -mtriple=i686-windows-msvc -o bar.obj
 # RUN: llvm-lib bar.obj -out:bar.lib
-# RUN: lld-link -debug -pdb:foo.pdb foo.obj bar.lib -out:foo.exe -entry:main
+# RUN: lld-link -safeseh:no -debug -pdb:foo.pdb foo.obj bar.lib -out:foo.exe -entry:main
 # RUN: llvm-pdbutil dump -modules %t/foo.pdb | FileCheck %s
 
 # Make sure that the PDB has module descriptors. foo.obj and bar.lib should be
diff --git a/lld/test/COFF/pdb-safeseh.yaml b/lld/test/COFF/pdb-safeseh.yaml
index 27948e38d3d3a..cc7ddb19a49c6 100644
--- a/lld/test/COFF/pdb-safeseh.yaml
+++ b/lld/test/COFF/pdb-safeseh.yaml
@@ -1,5 +1,5 @@
 # RUN: yaml2obj %s -o %t.obj
-# RUN: lld-link -debug -entry:main -out:%t.exe -pdb:%t.pdb %t.obj
+# RUN: lld-link -safeseh:no -debug -entry:main -out:%t.exe -pdb:%t.pdb %t.obj
 # RUN: llvm-pdbutil dump -globals %t.pdb | FileCheck %s
 
 # There is an S_GDATA32 symbol record with .secrel32 and .secidx relocations in
diff --git a/lld/test/COFF/pdb-unknown-subsection.s b/lld/test/COFF/pdb-unknown-subsection.s
index b64ed0373c633..10ffa46ded3f6 100644
--- a/lld/test/COFF/pdb-unknown-subsection.s
+++ b/lld/test/COFF/pdb-unknown-subsection.s
@@ -3,7 +3,7 @@
 
 # REQUIRES: x86
 # RUN: llvm-mc -triple=i386-pc-win32 -filetype=obj -o %t.obj %s
-# RUN: lld-link -subsystem:console -debug -nodefaultlib -entry:foo -out:%t.exe -pdb:%t.pdb %t.obj 2>&1 | FileCheck %s --check-prefix=WARNING
+# RUN: lld-link -safeseh:no -subsystem:console -debug -nodefaultlib -entry:foo -out:%t.exe -pdb:%t.pdb %t.obj 2>&1 | FileCheck %s --check-prefix=WARNING
 # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s
 
 # WARNING-NOT: ignoring unknown
diff --git a/lld/test/COFF/reloc-x86.test b/lld/test/COFF/reloc-x86.test
index bd500be164a71..99547d1cd1452 100644
--- a/lld/test/COFF/reloc-x86.test
+++ b/lld/test/COFF/reloc-x86.test
@@ -1,6 +1,6 @@
 # REQUIRES: x86
 # RUN: yaml2obj < %s > %t.obj
-# RUN: lld-link /out:%t.exe /entry:main /base:0x400000 %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe /entry:main /base:0x400000 %t.obj
 # RUN: llvm-objdump -d %t.exe | FileCheck %s
 
 # CHECK: .text:
diff --git a/lld/test/COFF/safeseh-no.s b/lld/test/COFF/safeseh-no.s
new file mode 100644
index 0000000000000..2a301a3ba9b83
--- /dev/null
+++ b/lld/test/COFF/safeseh-no.s
@@ -0,0 +1,56 @@
+# RUN: llvm-mc -triple i686-windows-msvc %s -filetype=obj -o %t.obj
+# RUN: not lld-link %t.obj -safeseh -out:%t.exe -entry:main 2>&1 | FileCheck %s --check-prefix=ERROR
+# safe seh should be on by default.
+# RUN: not lld-link %t.obj -out:%t.exe -entry:main 2>&1 | FileCheck %s --check-prefix=ERROR
+# RUN: lld-link %t.obj -safeseh:no -out:%t.exe -entry:main
+# RUN: llvm-readobj --file-headers --coff-load-config %t.exe | FileCheck %s
+# -lldmingw should also turn off safeseh by default.
+# RUN: lld-link %t.obj -lldmingw -out:%t.exe -entry:main
+# RUN: llvm-readobj --file-headers --coff-load-config %t.exe | FileCheck %s
+
+# ERROR: /safeseh: {{.*}}safeseh-no.s.tmp.obj is not compatible with SEH
+
+# CHECK: Characteristics [
+# CHECK-NOT:   IMAGE_DLL_CHARACTERISTICS_NO_SEH
+# CHECK: ]
+# CHECK: LoadConfig [
+# CHECK:   Size: 0x48
+# CHECK:   SEHandlerTable: 0x0
+# CHECK:   SEHandlerCount: 0
+# CHECK: ]
+# CHECK-NOT: SEHTable
+
+
+# Explicitly mark the object as not having safeseh. LLD should error unless
+# -safeseh:no is passed.
+        .def     @feat.00; .scl    3; .type   0; .endef
+        .globl  @feat.00
+@feat.00 = 0
+
+        .def     _main;
+        .scl    2;
+        .type   32;
+        .endef
+        .section        .text,"xr",one_only,_main
+        .globl  _main
+_main:
+        movl $42, %eax
+        ret
+
+# Add a handler to create an .sxdata section, which -safeseh:no should ignore.
+        .def     _my_handler; .scl    3; .type   32;
+        .endef
+        .section        .text,"xr",one_only,_my_handler
+_my_handler:
+        ret
+.safeseh _my_handler
+
+
+        .section .rdata,"dr"
+.globl __load_config_used
+__load_config_used:
+        .long 72
+        .fill 60, 1, 0
+        .long ___safe_se_handler_table
+        .long ___safe_se_handler_count
+
diff --git a/lld/test/COFF/subsystem-drectve.test b/lld/test/COFF/subsystem-drectve.test
index 45d48518a99db..68630eb2d82fb 100644
--- a/lld/test/COFF/subsystem-drectve.test
+++ b/lld/test/COFF/subsystem-drectve.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %s > %t.obj
-# RUN: lld-link /dll /noentry /out:%t.dll %t.obj
+# RUN: lld-link -safeseh:no /dll /noentry /out:%t.dll %t.obj
 # RUN: llvm-readobj --file-headers %t.dll | FileCheck %s
 
 # CHECK: MajorOperatingSystemVersion: 42
diff --git a/lld/test/COFF/subsystem-inference32.test b/lld/test/COFF/subsystem-inference32.test
index 23bcf0da6e650..d213550786bd9 100644
--- a/lld/test/COFF/subsystem-inference32.test
+++ b/lld/test/COFF/subsystem-inference32.test
@@ -1,17 +1,17 @@
 # RUN: sed -e s/ENTRYNAME/_main/ %s | yaml2obj > %t.obj
-# RUN: lld-link /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=MAIN %s
 
 # RUN: sed s/ENTRYNAME/_wmain/ %s | yaml2obj > %t.obj
-# RUN: lld-link /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=WMAIN %s
 
 # RUN: sed s/ENTRYNAME/_WinMain@16/ %s | yaml2obj > %t.obj
-# RUN: lld-link /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=WINMAIN %s
 
 # RUN: sed s/ENTRYNAME/_wWinMain@16/ %s | yaml2obj > %t.obj
-# RUN: lld-link /out:%t.exe %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=WWINMAIN %s
 
 # MAIN:     Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI
diff --git a/lld/test/COFF/tls32.test b/lld/test/COFF/tls32.test
index f3db2615ea47b..462cec06b2097 100644
--- a/lld/test/COFF/tls32.test
+++ b/lld/test/COFF/tls32.test
@@ -1,5 +1,5 @@
 # RUN: yaml2obj < %s > %t.obj
-# RUN: lld-link /out:%t.exe /entry:main %t.obj
+# RUN: lld-link -safeseh:no /out:%t.exe /entry:main %t.obj
 # RUN: llvm-readobj --file-headers %t.exe | FileCheck %s
 
 # CHECK: TLSTableRVA: 0x1000

From 8f8d07e93bf891bf67329efb8f8d8609bf77f1c0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 18:21:25 +0000
Subject: [PATCH 265/451] AMDGPU: Replace store PatFrags

Convert the easy cases to formats understood for GlobalISel.

llvm-svn: 366240
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 44 ++++++++++++++------
 llvm/lib/Target/AMDGPU/FLATInstructions.td   |  4 +-
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index d470b3cd51486..61bc415c839da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -467,25 +467,48 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
   let MemoryVT = i64;
 }
 
+def store_#as : PatFrag<(ops node:$val, node:$ptr),
+                    (unindexedstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let IsTruncStore = 0;
+}
+
+// truncstore fragments.
+def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
+                             (unindexedstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let IsTruncStore = 1;
+}
+
+// TODO: We don't really need the truncstore here. We can use
+// unindexedstore with MemoryVT directly, which will save an
+// unnecessary check that the memory size is less than the value type
+// in the generated matcher table.
+def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
+                               (truncstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let MemoryVT = i8;
+}
+
+def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
+                                (truncstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let MemoryVT = i16;
+}
+
+defm atomic_store_#as : binary_atomic_op<atomic_store>;
+
 } // End let AddressSpaces = ...
 } // End foreach AddrSpace
 
-def store_private : PrivateStore <store>;
-def truncstorei8_private : PrivateStore<truncstorei8>;
-def truncstorei16_private : PrivateStore <truncstorei16>;
+
 def store_hi16_private : StoreHi16 <truncstorei16>, PrivateAddress;
 def truncstorei8_hi16_private : StoreHi16<truncstorei8>, PrivateAddress;
 
-def store_global : GlobalStore <store>;
-def truncstorei8_global : GlobalStore <truncstorei8>;
-def truncstorei16_global : GlobalStore <truncstorei16>;
 def store_atomic_global : GlobalStore<atomic_store>;
 def truncstorei8_hi16_global : StoreHi16 <truncstorei8>, GlobalAddress;
 def truncstorei16_hi16_global : StoreHi16 <truncstorei16>, GlobalAddress;
 
-def store_local : LocalStore <store>;
-def truncstorei8_local : LocalStore <truncstorei8>;
-def truncstorei16_local : LocalStore <truncstorei16>;
 def store_local_hi16 : StoreHi16 <truncstorei16>, LocalAddress;
 def truncstorei8_local_hi16 : StoreHi16<truncstorei8>, LocalAddress;
 def atomic_store_local : LocalStore <atomic_store>;
@@ -506,9 +529,6 @@ def store_align16_local : Aligned16Bytes <
   (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr)
 >;
 
-def store_flat         : FlatStore <store>;
-def truncstorei8_flat  : FlatStore <truncstorei8>;
-def truncstorei16_flat : FlatStore <truncstorei16>;
 def atomic_store_flat  : FlatStore <atomic_store>;
 def truncstorei8_hi16_flat  : StoreHi16<truncstorei8>, FlatStoreAddress;
 def truncstorei16_hi16_flat : StoreHi16<truncstorei16>, FlatStoreAddress;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 9d541560613cc..8ddf4e2aa2b26 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -792,8 +792,8 @@ def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32>;
 def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
 def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32>;
 
-def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat, i64>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;
 
 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;

From 6e1c3bb181b754f92501ee85f157345e25769317 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Tue, 16 Jul 2019 18:23:49 +0000
Subject: [PATCH 266/451] [IndVars] Speculative fix for an assertion failure
 seen in bots

I don't have an IR sample which is actually failing, but the issue described in the comment is theoretically possible, and should be guarded against even if there's a different root cause for the bot failures.

llvm-svn: 366241
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 70508bf752580..f9fc698a4a9bc 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -2810,7 +2810,12 @@ bool IndVarSimplify::run(Loop *L) {
       if (isa<SCEVCouldNotCompute>(ExitCount))
         continue;
 
-      assert(!ExitCount->isZero() && "Should have been folded above");
+      // This was handled above, but as we form SCEVs, we can sometimes refine
+      // existing ones; this allows exit counts to be folded to zero which
+      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
+      // until stable to handle cases like this better.
+      if (ExitCount->isZero())
+        continue;
       
       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
       if (!IndVar)

From 7eb1902cd54d5715a3e3c096d9624bda749d26a5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 18:26:42 +0000
Subject: [PATCH 267/451] AMDGPU: Add register classes to flat store patterns

For some reason GlobalISelEmitter needs register classes to import
these, although it works for the load patterns.

llvm-svn: 366242
---
 llvm/lib/Target/AMDGPU/FLATInstructions.td | 50 +++++++++++-----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 8ddf4e2aa2b26..889f60dae9204 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -705,47 +705,47 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCN
 >;
 
 class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
-  (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
+  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
   (inst $vaddr, $offset, 0, 0, $slc, $in)
 >;
 
 class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
-  (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in),
+  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
   (inst $vaddr, $offset, 0, 0, $slc, $in)
 >;
 
 class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
-  (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
+  (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
   (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
 class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
-  (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))),
+  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
   (inst $vaddr, $offset, 0, 0, $slc)
 >;
 
-class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
-  (inst $vaddr, $data, $offset, 0, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
-class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
-  (inst $vaddr, $data, $offset, 0, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
-class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   // atomic store follows atomic binop convention so the address comes
   // first.
   (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
-  (inst $vaddr, $data, $offset, 0, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
-class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
   // atomic store follows atomic binop convention so the address comes
   // first.
   (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
-  (inst $vaddr, $data, $offset, 0, 0, $slc)
+  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
 >;
 
 class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
@@ -788,12 +788,12 @@ def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
 def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
 def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
 def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
+def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
 
 def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
 
 def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
 def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
@@ -871,14 +871,14 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
 def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
 def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
 
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
-def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
+def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
 
 let OtherPredicates = [D16PreservesUnusedBits] in {
 def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
@@ -900,7 +900,7 @@ def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
 }
 
 def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
-def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64>;
+def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
 
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
 def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;

From 5826ab6b0c9cf7e0dbafc164c4cca1404c29ed09 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 16 Jul 2019 18:27:12 +0000
Subject: [PATCH 268/451] [CMake] Fail when Python interpreter doesn't match
 Python libraries version

Because of how CMake finds the Python libraries and interpreter, it's
possible to end up with a discrepancy between the two. For example,
you'd end up using a Python 3 interpreter to run the test suite while
LLDB was built and linked against Python 2.

This patch adds a fatal error to CMake so we find out at configuration
time, instead of finding out at test time.

Differential revision: https://reviews.llvm.org/D64812

llvm-svn: 366243
---
 lldb/cmake/modules/LLDBConfig.cmake | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index ef9356591c301..26a1c7a72cc08 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -185,7 +185,6 @@ function(find_python_libs_windows)
 endfunction(find_python_libs_windows)
 
 if (NOT LLDB_DISABLE_PYTHON)
-
   if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
     find_python_libs_windows()
 
@@ -194,8 +193,12 @@ if (NOT LLDB_DISABLE_PYTHON)
       add_definitions( -DLLDB_PYTHON_HOME="${LLDB_PYTHON_HOME}" )
     endif()
   else()
-    find_package(PythonInterp)
-    find_package(PythonLibs)
+    find_package(PythonInterp REQUIRED)
+    find_package(PythonLibs REQUIRED)
+  endif()
+
+  if (NOT PYTHON_VERSION_STRING VERSION_EQUAL PYTHONLIBS_VERSION_STRING)
+    message(FATAL_ERROR "Found incompatible Python interpreter (${PYTHON_VERSION_STRING}) and Python libraries (${PYTHONLIBS_VERSION_STRING})")
   endif()
 
   if (PYTHON_INCLUDE_DIR)

From 4b6f69fe906e173603f1613bd84450486f63b3ee Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 18:33:13 +0000
Subject: [PATCH 269/451] Fix linkrepro.test after safeseh:no change

Add the @feat.00 flag to the input.

llvm-svn: 366244
---
 lld/test/COFF/Inputs/hello32.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lld/test/COFF/Inputs/hello32.yaml b/lld/test/COFF/Inputs/hello32.yaml
index 09e76f144532b..c01c4c6ba56bb 100644
--- a/lld/test/COFF/Inputs/hello32.yaml
+++ b/lld/test/COFF/Inputs/hello32.yaml
@@ -79,4 +79,10 @@ symbols:
     SimpleType:      IMAGE_SYM_TYPE_NULL
     ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
     StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            '@feat.00'
+    Value:           1
+    SectionNumber:   -1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
 ...

From 11dc3d371124f329762c0f2d9a75a5a82bd00b1a Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 18:34:46 +0000
Subject: [PATCH 270/451] Mark new test as requiring an x86 backend for LTO
 native object generation

llvm-svn: 366245
---
 lld/test/COFF/undefined-symbol-lto.test | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test
index 700ec650857ca..41957168baf60 100644
--- a/lld/test/COFF/undefined-symbol-lto.test
+++ b/lld/test/COFF/undefined-symbol-lto.test
@@ -1,3 +1,5 @@
+REQUIRES: x86
+
 RUN: rm -rf %t && mkdir -p %t && cd %t
 RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj
 RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj

From dad1f89210bff2a45e23fb2a6c31ffb247450b23 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 18:42:53 +0000
Subject: [PATCH 271/451] AMDGPU/GlobalISel: Select flat stores

llvm-svn: 366246
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   6 +-
 .../AMDGPU/GlobalISel/inst-select-copy.mir    |  16 +-
 .../GlobalISel/inst-select-implicit-def.mir   |  10 +-
 .../GlobalISel/inst-select-store-flat.mir     | 837 +++++++++++++++++-
 .../GlobalISel/inst-select-store-global.mir   | 817 +++++++++++++++++
 .../GlobalISel/llvm.amdgcn.end.cf.i32.ll      |   8 +-
 .../GlobalISel/llvm.amdgcn.end.cf.i64.ll      |   6 +-
 .../GlobalISel/llvm.amdgcn.if.break.i32.ll    |   4 +-
 8 files changed, 1650 insertions(+), 54 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f8f89593d0805..25e72bbe75abf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -856,7 +856,7 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
   unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
   unsigned Opcode;
 
-  // FIXME: Select store instruction based on address space
+  // FIXME: Remove this when integers > s32 naturally selected.
   switch (StoreSize) {
   default:
     return false;
@@ -1363,6 +1363,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
   case TargetOpcode::G_SELECT:
     return selectG_SELECT(I);
   case TargetOpcode::G_STORE:
+    if (selectImpl(I, CoverageInfo))
+      return true;
     return selectG_STORE(I);
   case TargetOpcode::G_TRUNC:
     return selectG_TRUNC(I);
@@ -1545,7 +1547,7 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
   }};
 }
 
-  template <bool Signed>
+template <bool Signed>
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
   MachineInstr *MI = Root.getParent();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
index 2f2ad31cd0ad7..558f672c2089c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir
@@ -17,13 +17,13 @@ body: |
     ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
     ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
     ; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     ; WAVE32-LABEL: name: copy
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
     ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
     ; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; WAVE32: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE32: GLOBAL_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %0:sgpr(p1) = COPY $sgpr2_sgpr3
     %1:vgpr(p1) = COPY %0
     %2:vgpr(s32) = G_IMPLICIT_DEF
@@ -46,7 +46,7 @@ body: |
     ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
     ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
     ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
-    ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -55,7 +55,7 @@ body: |
     ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
     ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
-    ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s32) = COPY $vgpr3
@@ -83,7 +83,7 @@ body: |
     ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
     ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
     ; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
-    ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank_2_uses
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -94,7 +94,7 @@ body: |
     ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec
     ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
     ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
-    ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s32) = COPY $vgpr3
@@ -122,7 +122,7 @@ body: |
     ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
     ; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
     ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
-    ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
     ; WAVE32: $vcc_hi = IMPLICIT_DEF
     ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -130,7 +130,7 @@ body: |
     ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
     ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
     ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
-    ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
     %2:vgpr(s32) = COPY $vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
index 43bd32644ff70..5e14f7e50083f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir
@@ -104,7 +104,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_p1_vgpr
     ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     %0:vgpr(p1) = G_IMPLICIT_DEF
     %1:vgpr(s32) = G_CONSTANT i32 4
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -119,9 +119,9 @@ regBankSelected: true
 body: |
   bb.0:
     ; GCN-LABEL: name: implicit_def_p3_vgpr
-    ; GCN: [[DEF:%[0-9]+]]:vgpr(p3) = G_IMPLICIT_DEF
-    ; GCN: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4
-    ; GCN: G_STORE [[C]](s32), [[DEF]](p3) :: (store 4, addrspace 1)
+    ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
+    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     %0:vgpr(p3) = G_IMPLICIT_DEF
     %1:vgpr(s32) = G_CONSTANT i32 4
     G_STORE %1, %0 :: (store 4, addrspace 1)
@@ -138,7 +138,7 @@ body: |
     ; GCN-LABEL: name: implicit_def_p4_vgpr
     ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     %0:vgpr(p4) = G_IMPLICIT_DEF
     %1:vgpr(s32) = G_CONSTANT i32 4
     G_STORE %1, %0 :: (store 4, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index eb8e39cd08df0..f88d8ee615f4e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -1,42 +1,827 @@
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+
+---
+
+name: store_flat_s32_to_4
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_flat_s32_to_4
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    ; GFX8-LABEL: name: store_flat_s32_to_4
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    ; GFX9-LABEL: name: store_flat_s32_to_4
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    ; GFX10-LABEL: name: store_flat_s32_to_4
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
 
---- |
-  define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0,
-                                              i64 addrspace(1)* %global1,
-                                              i96 addrspace(1)* %global2,
-                                              i128 addrspace(1)* %global3) { ret void }
 ...
+
 ---
+name: store_flat_s32_to_2
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_flat_s32_to_2
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+    ; GFX8-LABEL: name: store_flat_s32_to_2
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+    ; GFX9-LABEL: name: store_flat_s32_to_2
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+    ; GFX10-LABEL: name: store_flat_s32_to_2
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 2, align 2, addrspace 0)
+
+...
 
-name:            global_addrspace
+---
+name: store_flat_s32_to_1
 legalized:       true
+tracksRegLiveness: true
 regBankSelected: true
 
-# GCN: global_addrspace
-# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-# GCN: [[VAL4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-# GCN: [[VAL8:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
-# GCN: [[VAL12:%[0-9]+]]:vreg_96 = COPY $vgpr5_vgpr6_vgpr7
-# GCN: [[VAL16:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
-# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL4]], 0, 0, 0
-# GCN: FLAT_STORE_DWORDX2 [[PTR]], [[VAL8]], 0, 0, 0
-# GCN: FLAT_STORE_DWORDX3 [[PTR]], [[VAL12]], 0, 0, 0
-# GCN: FLAT_STORE_DWORDX4 [[PTR]], [[VAL16]], 0, 0, 0
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_flat_s32_to_1
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+    ; GFX8-LABEL: name: store_flat_s32_to_1
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+    ; GFX9-LABEL: name: store_flat_s32_to_1
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+    ; GFX10-LABEL: name: store_flat_s32_to_1
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 1, align 1, addrspace 0)
+
+...
+
+---
+
+name: store_flat_s64
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
 
 body: |
   bb.0:
-    liveins:  $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
 
+    ; GFX7-LABEL: name: store_flat_s64
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_s64
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_s64
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_s64
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
+
+...
+---
+
+name: store_flat_s96
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+
+    ; GFX7-LABEL: name: store_flat_s96
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_s96
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_s96
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_s96
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
+    G_STORE %1, %0 :: (store 12, align 16, addrspace 0)
+
+...
+---
+
+name: store_flat_s128
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_flat_s128
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_s128
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_s128
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_s128
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v2s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_flat_v2s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    ; GFX8-LABEL: name: store_flat_v2s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    ; GFX9-LABEL: name: store_flat_v2s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    ; GFX10-LABEL: name: store_flat_v2s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
+
+...
+---
+
+name: store_flat_v3s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+
+    ; GFX7-LABEL: name: store_flat_v3s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+    ; GFX8-LABEL: name: store_flat_v3s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+    ; GFX9-LABEL: name: store_flat_v3s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+    ; GFX10-LABEL: name: store_flat_v3s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    G_STORE %1, %0 :: (store 12, align 16, addrspace 0)
+
+...
+---
+
+name: store_flat_v4s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_flat_v4s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+    ; GFX8-LABEL: name: store_flat_v4s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+    ; GFX9-LABEL: name: store_flat_v4s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+    ; GFX10-LABEL: name: store_flat_v4s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v2s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_flat_v2s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v2s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v2s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v2s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s16>) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v4s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_flat_v4s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v4s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v4s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v4s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v6s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+
+    ; GFX7-LABEL: name: store_flat_v6s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v6s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v6s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v6s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
+    G_STORE %1, %0 :: (store 12, align 16, addrspace 0)
+
+...
+---
+
+name: store_flat_v8s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_flat_v8s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v8s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v8s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v8s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v2s64
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_flat_v2s64
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v2s64
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v2s64
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v2s64
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
+
+...
+
+---
+
+name: store_flat_p1
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_flat_p1
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_p1
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_p1
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_p1
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p1) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v2p1
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_flat_v2p1
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v2p1
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v2p1
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v2p1
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
+
+...
+
+---
+
+name: store_flat_p3
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_flat_p3
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_p3
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_p3
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_p3
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
+
+...
+
+---
+
+name: store_flat_v2p3
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_flat_v2p3
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_flat_v2p3
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_flat_v2p3
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_flat_v2p3
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
+
+...
+---
+
+name: store_atomic_flat_s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_atomic_flat_s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_atomic_flat_s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_atomic_flat_s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_atomic_flat_s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
-    %2:vgpr(s64) = COPY $vgpr3_vgpr4
-    %3:vgpr(s96) = COPY $vgpr5_vgpr6_vgpr7
-    %4:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
-    G_STORE %1, %0 :: (store 4 into %ir.global0)
-    G_STORE %2, %0 :: (store 8 into %ir.global1)
-    G_STORE %3, %0 :: (store 12 into %ir.global2, align 16)
-    G_STORE %4, %0 :: (store 16 into %ir.global3)
+    G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0)
 
 ...
+
 ---
+
+name: store_atomic_flat_s64
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_atomic_flat_s64
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_atomic_flat_s64
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_atomic_flat_s64
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_atomic_flat_s64
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0)
+
+...
+
+---
+
+name: store_flat_s32_gep_2047
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_flat_s32_gep_2047
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    ; GFX8-LABEL: name: store_flat_s32_gep_2047
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    ; GFX9-LABEL: name: store_flat_s32_gep_2047
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    ; GFX10-LABEL: name: store_flat_s32_gep_2047
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_CONSTANT i64 2047
+    %3:vgpr(p1) = G_GEP %0, %2
+    G_STORE %1, %3 :: (store 4, align 4, addrspace 0)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
new file mode 100644
index 0000000000000..2154d1cfee8cf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -0,0 +1,817 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
+
+---
+
+name: store_global_s32_to_4
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_global_s32_to_4
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+    ; GFX8-LABEL: name: store_global_s32_to_4
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+    ; GFX9-LABEL: name: store_global_s32_to_4
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    ; GFX10-LABEL: name: store_global_s32_to_4
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
+
+...
+
+---
+name: store_global_s32_to_2
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_global_s32_to_2
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
+    ; GFX8-LABEL: name: store_global_s32_to_2
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
+    ; GFX9-LABEL: name: store_global_s32_to_2
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
+    ; GFX10-LABEL: name: store_global_s32_to_2
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 2, align 2, addrspace 1)
+
+...
+
+---
+name: store_global_s32_to_1
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_global_s32_to_1
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
+    ; GFX8-LABEL: name: store_global_s32_to_1
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
+    ; GFX9-LABEL: name: store_global_s32_to_1
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
+    ; GFX10-LABEL: name: store_global_s32_to_1
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 1, align 1, addrspace 1)
+
+...
+
+---
+
+name: store_global_s64
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_global_s64
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_s64
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_s64
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_s64
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
+
+...
+---
+
+name: store_global_s96
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+
+    ; GFX7-LABEL: name: store_global_s96
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_s96
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_s96
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_s96
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
+    G_STORE %1, %0 :: (store 12, align 16, addrspace 1)
+
+...
+---
+
+name: store_global_s128
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_global_s128
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_s128
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_s128
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_s128
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
+
+...
+
+---
+
+name: store_global_v2s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_global_v2s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+    ; GFX8-LABEL: name: store_global_v2s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
+    ; GFX9-LABEL: name: store_global_v2s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+    ; GFX10-LABEL: name: store_global_v2s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
+
+...
+---
+
+name: store_global_v3s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+
+    ; GFX7-LABEL: name: store_global_v3s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
+    ; GFX8-LABEL: name: store_global_v3s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
+    ; GFX9-LABEL: name: store_global_v3s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
+    ; GFX10-LABEL: name: store_global_v3s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
+    G_STORE %1, %0 :: (store 12, align 16, addrspace 1)
+
+...
+---
+
+name: store_global_v4s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_global_v4s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+    ; GFX8-LABEL: name: store_global_v4s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
+    ; GFX9-LABEL: name: store_global_v4s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    ; GFX10-LABEL: name: store_global_v4s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
+
+...
+
+---
+
+name: store_global_v2s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_global_v2s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v2s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v2s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v2s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s16>) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
+
+...
+
+---
+
+name: store_global_v4s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_global_v4s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v4s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v4s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v4s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
+
+...
+
+---
+
+name: store_global_v6s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+
+    ; GFX7-LABEL: name: store_global_v6s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v6s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v6s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v6s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
+    ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
+    G_STORE %1, %0 :: (store 12, align 16, addrspace 1)
+
+...
+---
+
+name: store_global_v8s16
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_global_v8s16
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v8s16
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v8s16
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v8s16
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
+
+...
+
+---
+
+name: store_global_v2s64
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_global_v2s64
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v2s64
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v2s64
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v2s64
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
+
+...
+
+---
+
+name: store_global_p1
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_global_p1
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_p1
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_p1
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_p1
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p1) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
+
+...
+
+---
+
+name: store_global_v2p1
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; GFX7-LABEL: name: store_global_v2p1
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v2p1
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v2p1
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v2p1
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
+
+...
+
+---
+
+name: store_global_p3
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_global_p3
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_p3
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_p3
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_p3
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p3) = COPY $vgpr2
+    G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
+
+...
+
+---
+
+name: store_global_v2p3
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_global_v2p3
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_global_v2p3
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_global_v2p3
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_global_v2p3
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
+
+...
+---
+
+name: store_atomic_global_s32
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_atomic_global_s32
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_atomic_global_s32
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_atomic_global_s32
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_atomic_global_s32
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1)
+
+...
+
+---
+
+name: store_atomic_global_s64
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX7-LABEL: name: store_atomic_global_s64
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX8-LABEL: name: store_atomic_global_s64
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX9-LABEL: name: store_atomic_global_s64
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    ; GFX10-LABEL: name: store_atomic_global_s64
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = COPY $vgpr2_vgpr3
+    G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1)
+
+...
+
+---
+
+name: store_global_s32_gep_2047
+legalized:       true
+tracksRegLiveness: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1, $vgpr2
+
+    ; GFX7-LABEL: name: store_global_s32_gep_2047
+    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+    ; GFX8-LABEL: name: store_global_s32_gep_2047
+    ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
+    ; GFX9-LABEL: name: store_global_s32_gep_2047
+    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    ; GFX10-LABEL: name: store_global_s32_gep_2047
+    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_CONSTANT i64 2047
+    %3:vgpr(p1) = G_GEP %0, %2
+    G_STORE %1, %3 :: (store 4, align 4, addrspace 1)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
index 8689b650b8f25..f35b0b43d3694 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -12,15 +12,11 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
 ; GCN-NEXT:    s_cbranch_scc0 BB0_2
 ; GCN-NEXT:  ; %bb.1: ; %mid
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
-; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:  BB0_2: ; %bb
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
-; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:    s_endpgm
 entry:
   %cond = icmp eq i32 %arg0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
index 9e19eefab3b5e..6172c9ceeab98 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
@@ -11,13 +11,11 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) {
 ; GCN-NEXT:    s_cbranch_scc0 BB0_2
 ; GCN-NEXT:  ; %bb.1: ; %mid
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:  BB0_2: ; %bb
 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:    s_endpgm
 entry:
   %cond = icmp eq i32 %arg0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
index 282441a2a1d74..0f259fcb89500 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
@@ -13,9 +13,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
 ; GCN-NEXT:    s_or_b32 s0, s0, s1
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_waitcnt_vscnt null, 0x0
-; GCN-NEXT:    flat_store_dword v[0:1], v0
+; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:    s_endpgm
 entry:
   %cond = icmp eq i32 %arg0, 0

From 1d58c1d9d6b88b2b5894aa79dba11328c3791fb1 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 18:46:51 +0000
Subject: [PATCH 272/451] Fix LLDB Windows build Python version logic after
 r366243

llvm-svn: 366247
---
 lldb/cmake/modules/LLDBConfig.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index 26a1c7a72cc08..ad83153a18898 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -139,6 +139,7 @@ function(find_python_libs_windows)
          PYTHONLIBS_VERSION_STRING "${python_version_str}")
     message(STATUS "Found Python version ${PYTHONLIBS_VERSION_STRING}")
     string(REGEX REPLACE "([0-9]+)[.]([0-9]+)[.][0-9]+" "python\\1\\2" PYTHONLIBS_BASE_NAME "${PYTHONLIBS_VERSION_STRING}")
+    set(PYTHONLIBS_VERSION_STRING "${PYTHONLIBS_VERSION_STRING}" PARENT_SCOPE)
     unset(python_version_str)
   else()
     message(WARNING "Unable to find ${PYTHON_INCLUDE_DIR}/patchlevel.h, Python installation is corrupt.")

From 7161fb0be59e56becefb8646583cde912bcbfa5c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 19:22:21 +0000
Subject: [PATCH 273/451] AMDGPU/GlobalISel: Select private loads

llvm-svn: 366248
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td         |    7 +
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |  136 +-
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |    5 +
 .../GlobalISel/inst-select-load-private.mir   | 1158 +++++++++++++++++
 4 files changed, 1305 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 1ccb90b2587ed..13ca1ce4b28fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -57,6 +57,13 @@ def gi_flat_offset_signed :
     GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
     GIComplexPatternEquiv<FLATOffsetSigned>;
 
+def gi_mubuf_scratch_offset :
+    GIComplexOperandMatcher<s32, "selectMUBUFScratchOffset">,
+    GIComplexPatternEquiv<MUBUFScratchOffset>;
+def gi_mubuf_scratch_offen :
+    GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
+    GIComplexPatternEquiv<MUBUFScratchOffen>;
+
 
 class GISelSop2Pat <
   SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 25e72bbe75abf..901a2eaa88295 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -17,10 +17,11 @@
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
-#include "SIMachineFunctionInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -34,6 +35,7 @@
 #define DEBUG_TYPE "amdgpu-isel"
 
 using namespace llvm;
+using namespace MIPatternMatch;
 
 #define GET_GLOBALISEL_IMPL
 #define AMDGPUSubtarget GCNSubtarget
@@ -1594,3 +1596,135 @@ InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
   return selectFlatOffsetImpl<true>(Root);
 }
+
+// FIXME: Implement
+static bool signBitIsZero(const MachineOperand &Op,
+                          const MachineRegisterInfo &MRI) {
+  return false;
+}
+
+static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
+  auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
+  return PSV && PSV->isStack();
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+
+  int64_t Offset = 0;
+  if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
+    Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+    // TODO: Should this be inside the render function? The iterator seems to
+    // move.
+    BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
+            HighBits)
+      .addImm(Offset & ~4095);
+
+    return {{[=](MachineInstrBuilder &MIB) { // rsrc
+               MIB.addReg(Info->getScratchRSrcReg());
+             },
+             [=](MachineInstrBuilder &MIB) { // vaddr
+               MIB.addReg(HighBits);
+             },
+             [=](MachineInstrBuilder &MIB) { // soffset
+               const MachineMemOperand *MMO = *MI->memoperands_begin();
+               const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+               Register SOffsetReg = isStackPtrRelative(PtrInfo)
+                                         ? Info->getStackPtrOffsetReg()
+                                         : Info->getScratchWaveOffsetReg();
+               MIB.addReg(SOffsetReg);
+             },
+             [=](MachineInstrBuilder &MIB) { // offset
+               MIB.addImm(Offset & 4095);
+             }}};
+  }
+
+  assert(Offset == 0);
+
+  // Try to fold a frame index directly into the MUBUF vaddr field, and any
+  // offsets.
+  Optional<int> FI;
+  Register VAddr = Root.getReg();
+  if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
+    if (isBaseWithConstantOffset(Root, MRI)) {
+      const MachineOperand &LHS = RootDef->getOperand(1);
+      const MachineOperand &RHS = RootDef->getOperand(2);
+      const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+      const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+      if (LHSDef && RHSDef) {
+        int64_t PossibleOffset =
+            RHSDef->getOperand(1).getCImm()->getSExtValue();
+        if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
+            (!STI.privateMemoryResourceIsRangeChecked() ||
+             signBitIsZero(LHS, MRI))) {
+          if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
+            FI = LHSDef->getOperand(1).getIndex();
+          else
+            VAddr = LHS.getReg();
+          Offset = PossibleOffset;
+        }
+      }
+    } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
+      FI = RootDef->getOperand(1).getIndex();
+    }
+  }
+
+  // If we don't know this private access is a local stack object, it needs to
+  // be relative to the entry point's scratch wave offset register.
+  // TODO: Should split large offsets that don't fit like above.
+  // TODO: Don't use scratch wave offset just because the offset didn't fit.
+  Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
+                                   : Info->getScratchWaveOffsetReg();
+
+  return {{[=](MachineInstrBuilder &MIB) { // rsrc
+             MIB.addReg(Info->getScratchRSrcReg());
+           },
+           [=](MachineInstrBuilder &MIB) { // vaddr
+             if (FI.hasValue())
+               MIB.addFrameIndex(FI.getValue());
+             else
+               MIB.addReg(VAddr);
+           },
+           [=](MachineInstrBuilder &MIB) { // soffset
+             MIB.addReg(SOffset);
+           },
+           [=](MachineInstrBuilder &MIB) { // offset
+             MIB.addImm(Offset);
+           }}};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFScratchOffset(
+    MachineOperand &Root) const {
+  MachineInstr *MI = Root.getParent();
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  int64_t Offset = 0;
+  if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
+      !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+    return {};
+
+  const MachineFunction *MF = MBB->getParent();
+  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+  const MachineMemOperand *MMO = *MI->memoperands_begin();
+  const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+  Register SOffsetReg = isStackPtrRelative(PtrInfo)
+                            ? Info->getStackPtrOffsetReg()
+                            : Info->getScratchWaveOffsetReg();
+  return {{
+      [=](MachineInstrBuilder &MIB) {
+        MIB.addReg(Info->getScratchRSrcReg());
+      },                                                         // rsrc
+      [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
+      [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }      // offset
+  }};
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index e30d745f5cb64..4f489ddfb23db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -128,6 +128,11 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   InstructionSelector::ComplexRendererFns
   selectFlatOffsetSigned(MachineOperand &Root) const;
 
+  InstructionSelector::ComplexRendererFns
+  selectMUBUFScratchOffen(MachineOperand &Root) const;
+  InstructionSelector::ComplexRendererFns
+  selectMUBUFScratchOffset(MachineOperand &Root) const;
+
   const SIInstrInfo &TII;
   const SIRegisterInfo &TRI;
   const AMDGPURegisterBankInfo &RBI;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
new file mode 100644
index 0000000000000..e969f457fab0d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
@@ -0,0 +1,1158 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+
+name: load_private_s32_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_4
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_2
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_2
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_v2s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+    ; GFX6-LABEL: name: load_private_v2s32
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]]
+    ; GFX9-LABEL: name: load_private_v2s32
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_private_v4s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v4s32
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]]
+    ; GFX9-LABEL: name: load_private_v4s32
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<4 x  s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_private_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s64
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    ; GFX9-LABEL: name: load_private_s64
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_private_v2s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v2s64
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    ; GFX9-LABEL: name: load_private_v2s64
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_private_v2p1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v2p1
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX9-LABEL: name: load_private_v2p1
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_private_s128
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s128
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX9-LABEL: name: load_private_s128
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_private_p3_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_p3_from_4
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
+    ; GFX9-LABEL: name: load_private_p3_from_4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_p5_from_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_p5_from_4
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[LOAD]](p5)
+    ; GFX9-LABEL: name: load_private_p5_from_4
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](p5)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_p999_from_8
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_p999_from_8
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    ; GFX9-LABEL: name: load_private_p999_from_8
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_private_v2p3
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v2p3
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    ; GFX9-LABEL: name: load_private_v2p3
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_private_v2s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v2s16
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    ; GFX9-LABEL: name: load_private_v2s16
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_v4s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v4s16
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    ; GFX9-LABEL: name: load_private_v4s16
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 5)
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+# ---
+
+# name: load_private_v6s16
+# legalized:       true
+# regBankSelected: true
+# tracksRegLiveness: true
+# machineFunctionInfo:
+#   scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+#   scratchWaveOffsetReg: $sgpr4
+#   stackPtrOffsetReg: $sgpr32
+
+# body: |
+#   bb.0:
+#     liveins:  $vgpr0
+
+#     %0:vgpr(p5) = COPY $vgpr0
+#     %1:vgpr(<6 x  s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 5)
+#     $vgpr0_vgpr1_vgpr2 = COPY %1
+
+# ...
+
+---
+
+name: load_private_v8s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_v8s16
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-LABEL: name: load_private_v8s16
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(<8 x  s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 5)
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+################################################################################
+### Stress addressing modes
+################################################################################
+
+---
+
+name: load_private_s32_from_1_gep_2047
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 2047
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_2048
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_2048
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 2048
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_m2047
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2047
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -2047
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_m2048
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2048
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -2048
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_4095
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 4095
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_4096
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 4096
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_m4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4095
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -4095
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_m4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4096
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -4096
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_8191
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_8191
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 8191
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_8192
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_8192
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 8192
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_m8191
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8191
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -8191
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_gep_m8192
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins:  $vgpr0
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8192
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -8192
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_4_constant_0
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_4_constant_0
+    ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+    ; GFX9-LABEL: name: load_private_s32_from_4_constant_0
+    ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+    %0:vgpr(p5) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_4_constant_sgpr_16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16
+    ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+    ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16
+    ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
+    %0:sgpr(p5) = G_CONSTANT i32 16
+    %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_1_constant_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
+    %0:vgpr(p5) = G_CONSTANT i32 4095
+    %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_1_constant_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_constant_4096
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = G_CONSTANT i32 4096
+    %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_fi
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+stack:
+  - { id: 0, size: 4, alignment: 4 }
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_fi
+    ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_fi
+    ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+    %0:vgpr(p5) = G_FRAME_INDEX %stack.0
+    %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+    $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_private_s32_from_1_fi_offset_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+stack:
+  - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = G_FRAME_INDEX %stack.0
+    %1:vgpr(s32) = G_CONSTANT i32 4095
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_private_s32_from_1_fi_offset_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+stack:
+  - { id: 0, size: 8192, alignment: 4 }
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4096
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
+    ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
+    ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
+    ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
+    %0:vgpr(p5) = G_FRAME_INDEX %stack.0
+    %1:vgpr(s32) = G_CONSTANT i32 4096
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5)
+    $vgpr0 = COPY %3
+
+...

From 2d10407719683dcfcab0f2b7f33d92cbedd9b876 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 19:27:44 +0000
Subject: [PATCH 274/451] AMDGPU/GlobalISel: Fix selection of private stores

llvm-svn: 366249
---
 llvm/lib/Target/AMDGPU/BUFInstructions.td     |  13 +-
 .../GlobalISel/inst-select-store-private.mir  | 280 ++++++++++++++++++
 2 files changed, 287 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir

diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 5b6c8a7ed96fc..62a19d848af2f 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1568,17 +1568,18 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>;
 
 multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
                                  MUBUF_Pseudo InstrOffset,
-                                 ValueType vt, PatFrag st> {
+                                 ValueType vt, PatFrag st,
+                                 RegisterClass rc = VGPR_32> {
   def : GCNPat <
     (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
                                       i32:$soffset, u16imm:$offset)),
-    (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+    (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 
   def : GCNPat <
     (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
                                        u16imm:$offset)),
-    (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+    (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
   >;
 }
 
@@ -1587,9 +1588,9 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET
 defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
 defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
 defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
 
 
 let OtherPredicates = [D16PreservesUnusedBits] in {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
new file mode 100644
index 0000000000000..822a1412d168c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
@@ -0,0 +1,280 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs  -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+
+name: store_private_s32_to_4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: store_private_s32_to_4
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+    ; GFX9-LABEL: name: store_private_s32_to_4
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = COPY $vgpr1
+    G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: store_private_s32_to_2
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+    ; GFX9-LABEL: name: store_private_s32_to_2
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = COPY $vgpr1
+    G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: store_private_s32_to_1
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    ; GFX9-LABEL: name: store_private_s32_to_1
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = COPY $vgpr1
+    G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
+
+...
+
+---
+
+name: store_private_v2s16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: store_private_v2s16
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+    ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5)
+    ; GFX9-LABEL: name: store_private_v2s16
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(p5) = COPY $vgpr1
+    G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_p3
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: store_private_p3
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+    ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5)
+    ; GFX9-LABEL: name: store_private_p3
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5)
+    %0:vgpr(p3) = COPY $vgpr0
+    %1:vgpr(p5) = COPY $vgpr1
+    G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_p5
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: store_private_p5
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+    ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5)
+    ; GFX9-LABEL: name: store_private_p5
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+    ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5)
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p5) = COPY $vgpr1
+    G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1_fi_offset_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+stack:
+  - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: store_private_s32_to_1_fi_offset_4095
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    ; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    %0:vgpr(p5) = G_FRAME_INDEX %stack.0
+    %1:vgpr(s32) = G_CONSTANT i32 4095
+    %2:vgpr(p5) = G_GEP %0, %1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    G_STORE %3, %2 :: (store 1, align 1, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1_constant_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+stack:
+  - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: store_private_s32_to_1_constant_4095
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    ; GFX9-LABEL: name: store_private_s32_to_1_constant_4095
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    %0:vgpr(p5) = G_CONSTANT i32 4095
+    %1:vgpr(s32) = G_CONSTANT i32 0
+    G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1_constant_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  scratchWaveOffsetReg: $sgpr4
+  stackPtrOffsetReg: $sgpr32
+stack:
+  - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+  bb.0:
+
+    ; GFX6-LABEL: name: store_private_s32_to_1_constant_4096
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    ; GFX9-LABEL: name: store_private_s32_to_1_constant_4096
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+    ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+    %0:vgpr(p5) = G_CONSTANT i32 4096
+    %1:vgpr(s32) = G_CONSTANT i32 0
+    G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
+
+...

From afdf6b3c37e83b78833310be7376ee19f066c554 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 19:44:14 +0000
Subject: [PATCH 275/451] AMDGPU: Fix some missing GCCBuiltin declarations

llvm-svn: 366250
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 56 ++++++++++++++----------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index bad4216173d0a..4c67cdea4d580 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -296,29 +296,33 @@ def int_amdgcn_fract : Intrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pkrtz : Intrinsic<
-  [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">,
+  Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty],
+            [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
-  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pknorm_i16 :
+  GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">,
+  Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+            [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
-  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pknorm_u16 :
+  GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">,
+  Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+            [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pk_i16 : Intrinsic<
+def int_amdgcn_cvt_pk_i16 :
+    GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">,
+    Intrinsic<
   [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_cvt_pk_u16 : Intrinsic<
-  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">,
+  Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_class : Intrinsic<
@@ -1245,14 +1249,17 @@ def int_amdgcn_ds_swizzle :
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem, IntrConvergent, ImmArg<1>]>;
 
-def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
-  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_ubfe :
+  GCCBuiltin<"__builtin_amdgcn_ubfe">,
+  Intrinsic<[llvm_anyint_ty],
+    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
-  [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, IntrSpeculatable]
+def int_amdgcn_sbfe : GCCBuiltin<"__builtin_amdgcn_sbfe">,
+  Intrinsic<[llvm_anyint_ty],
+    [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
+    [IntrNoMem, IntrSpeculatable]
 >;
 
 def int_amdgcn_lerp :
@@ -1340,13 +1347,14 @@ def int_amdgcn_writelane :
   [IntrNoMem, IntrConvergent]
 >;
 
-def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty],
+def int_amdgcn_alignbit :
+  GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty],
   [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty],
-  [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
   [IntrNoMem, IntrSpeculatable]
 >;
 
@@ -1515,13 +1523,13 @@ def int_amdgcn_ds_bpermute :
 //===----------------------------------------------------------------------===//
 
 // llvm.amdgcn.permlane16 <old> <src0> <src1> <src2> <fi> <bound_control>
-def int_amdgcn_permlane16 :
+def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">,
   Intrinsic<[llvm_i32_ty],
             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
             [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;
 
 // llvm.amdgcn.permlanex16 <old> <src0> <src1> <src2> <fi> <bound_control>
-def int_amdgcn_permlanex16 :
+def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">,
   Intrinsic<[llvm_i32_ty],
             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
             [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>;

From ccf22ef94c4a94f7598f51a70445fdec8f8a1bc8 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Tue, 16 Jul 2019 19:59:08 +0000
Subject: [PATCH 276/451] Fix -Wreturn-type warning. NFC.

llvm-svn: 366251
---
 llvm/lib/Remarks/RemarkParser.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp
index 46130d28f72c5..f67464073bd16 100644
--- a/llvm/lib/Remarks/RemarkParser.cpp
+++ b/llvm/lib/Remarks/RemarkParser.cpp
@@ -57,6 +57,7 @@ llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf,
     return createStringError(std::make_error_code(std::errc::invalid_argument),
                              "Unknown remark parser format.");
   }
+  llvm_unreachable("unknown format");
 }
 
 // Wrapper that holds the state needed to interact with the C API.

From 6e0fa292c22cde726b4ddb53cf1fa8c649384030 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Tue, 16 Jul 2019 20:06:00 +0000
Subject: [PATCH 277/451] [AMDGPU] Change register type for v32 vectors

When it is AReg_1024 this results in unnecessary copying into
AGPRs of a 32 element vectors even though they are not intended
for an mfma instruction.

Differential Revision: https://reviews.llvm.org/D64815

llvm-svn: 366252
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  4 ++--
 llvm/test/CodeGen/AMDGPU/v1024.ll         | 29 +++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/v1024.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a3226577cd02b..db0782e2bf3e5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -152,8 +152,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   }
 
   if (Subtarget->hasMAIInsts()) {
-    addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass);
-    addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass);
+    addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
+    addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass);
   }
 
   computeRegisterProperties(Subtarget->getRegisterInfo());
diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll
new file mode 100644
index 0000000000000..a5e0454a36344
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/v1024.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; Check that we do not use AGPRs for v32i32 type
+
+; GCN-LABEL: {{^}}test_v1024:
+; GCN-NOT: v_accvgpr
+; GCN-COUNT-32: v_mov_b32_e32
+; GCN-NOT: v_accvgpr
+define amdgpu_kernel void @test_v1024() {
+entry:
+  %alloca = alloca <32 x i32>, align 16, addrspace(5)
+  %cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)*
+  br i1 undef, label %if.then.i.i, label %if.else.i
+
+if.then.i.i:                                      ; preds = %entry
+  call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 16 %cast, i8 addrspace(5)* align 4 undef, i64 128, i1 false)
+  br label %if.then.i62.i
+
+if.else.i:                                        ; preds = %entry
+  br label %if.then.i62.i
+
+if.then.i62.i:                                    ; preds = %if.else.i, %if.then.i.i
+  call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* align 4 undef, i8 addrspace(5)* align 16 %cast, i64 128, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)
+
+declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg)

From b157dcacb5b96fd64900906911832a8ac3bb189e Mon Sep 17 00:00:00 2001
From: Mitch Phillips <mitchphillips@outlook.com>
Date: Tue, 16 Jul 2019 20:06:17 +0000
Subject: [PATCH 278/451] [GWP-ASan] Add thread ID to PRNG seed.

Summary:
Adds thread ID to PRNG seed for increased entropy. In particular, this allows
multiple runs in quick succession that will have different PRNG seeds, allowing
for better demos/testing.

Reviewers: kcc

Reviewed By: kcc

Subscribers: kubamracek, #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D64453

llvm-svn: 366253
---
 compiler-rt/lib/gwp_asan/guarded_pool_allocator.h | 8 ++++----
 compiler-rt/lib/gwp_asan/random.cpp               | 4 +++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h
index 400d50c0b0b70..28a41110faede 100644
--- a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h
+++ b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h
@@ -132,6 +132,10 @@ class GuardedPoolAllocator {
   // occur.
   static void reportError(uintptr_t AccessPtr, Error E = Error::UNKNOWN);
 
+  // Get the current thread ID, or kInvalidThreadID if failure. Note: This
+  // implementation is platform-specific.
+  static uint64_t getThreadID();
+
 private:
   static constexpr size_t kInvalidSlotID = SIZE_MAX;
 
@@ -146,10 +150,6 @@ class GuardedPoolAllocator {
   void markReadWrite(void *Ptr, size_t Size) const;
   void markInaccessible(void *Ptr, size_t Size) const;
 
-  // Get the current thread ID, or kInvalidThreadID if failure. Note: This
-  // implementation is platform-specific.
-  static uint64_t getThreadID();
-
   // Get the page size from the platform-specific implementation. Only needs to
   // be called once, and the result should be cached in PageSize in this class.
   static size_t getPlatformPageSize();
diff --git a/compiler-rt/lib/gwp_asan/random.cpp b/compiler-rt/lib/gwp_asan/random.cpp
index 67f4a22ef9525..90493da7e0385 100644
--- a/compiler-rt/lib/gwp_asan/random.cpp
+++ b/compiler-rt/lib/gwp_asan/random.cpp
@@ -7,12 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "gwp_asan/random.h"
+#include "gwp_asan/guarded_pool_allocator.h"
 
 #include <time.h>
 
 namespace gwp_asan {
 uint32_t getRandomUnsigned32() {
-  thread_local uint32_t RandomState = static_cast<uint64_t>(time(nullptr));
+  thread_local uint32_t RandomState =
+      time(nullptr) + GuardedPoolAllocator::getThreadID();
   RandomState ^= RandomState << 13;
   RandomState ^= RandomState >> 17;
   RandomState ^= RandomState << 5;

From 1b69fd275d589f48ce63bea73e311b7ef89c99ba Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 20:15:30 +0000
Subject: [PATCH 279/451] AMDGPU/GlobalISel: Select G_SHL

I think this manages to not break the DAG handling with the divergent
predicates because the stadalone divergent patterns end up with a
higher priority than the pattern on the instruction definition.

The 16-bit versions don't work yet.

llvm-svn: 366254
---
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |   4 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    |   2 +-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |   2 +-
 .../AMDGPU/GlobalISel/inst-select-shl.mir     | 327 ++++++++++++++++++
 .../AMDGPU/GlobalISel/inst-select-shl.s16.mir | 203 +++++++++++
 .../GlobalISel/inst-select-shl.v2s16.mir      | 168 +++++++++
 6 files changed, 702 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir

diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index f46bee126043d..b762b84d9ca7d 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -511,10 +511,10 @@ let AddedComplexity = 1 in {
 let Defs = [SCC] in {
 // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
-  [(set i32:$sdst, (UniformBinFrag<shl> i32:$src0, i32:$src1))]
+  [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
-  [(set i64:$sdst, (UniformBinFrag<shl> i64:$src0, i32:$src1))]
+  [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
   [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index fa9b913c2de2a..260e8a498fb7b 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -474,7 +474,7 @@ defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
 defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
 defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
 defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
 defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
 defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index f7699e61d59ef..6ebb9557c3c0f 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -393,7 +393,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
 } // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10]
 
 let SubtargetPredicate = isGFX8Plus in {
-def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
+def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
 def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
 def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
 } // End SubtargetPredicate = isGFX8Plus
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir
new file mode 100644
index 0000000000000..34c6c781f64e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir
@@ -0,0 +1,327 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
+
+---
+name: shl_s32_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: shl_s32_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX6: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+    ; GFX7-LABEL: name: shl_s32_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX7: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX7: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+    ; GFX8-LABEL: name: shl_s32_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX8: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+    ; GFX9-LABEL: name: shl_s32_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+    ; GFX10-LABEL: name: shl_s32_ss
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX10: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s32_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: shl_s32_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX7-LABEL: name: shl_s32_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX8-LABEL: name: shl_s32_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX9-LABEL: name: shl_s32_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX10-LABEL: name: shl_s32_sv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s32_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: shl_s32_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX7-LABEL: name: shl_s32_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX8-LABEL: name: shl_s32_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX9-LABEL: name: shl_s32_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX10-LABEL: name: shl_s32_vs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s32) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: shl_s32_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX7-LABEL: name: shl_s32_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX7: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX8-LABEL: name: shl_s32_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX9-LABEL: name: shl_s32_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    ; GFX10-LABEL: name: shl_s32_vv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s64_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2
+    ; GFX6-LABEL: name: shl_s64_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX6: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    ; GFX7-LABEL: name: shl_s64_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX7: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX7: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    ; GFX8-LABEL: name: shl_s64_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX8: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    ; GFX9-LABEL: name: shl_s64_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX9: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    ; GFX10-LABEL: name: shl_s64_ss
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX10: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10: S_ENDPGM 0, implicit [[S_LSHL_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s32) = COPY $sgpr2
+    %2:sgpr(s64) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s64_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX6-LABEL: name: shl_s64_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX7-LABEL: name: shl_s64_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX8-LABEL: name: shl_s64_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX9-LABEL: name: shl_s64_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX10-LABEL: name: shl_s64_sv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s64) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s64_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0_vgpr1
+    ; GFX6-LABEL: name: shl_s64_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX7-LABEL: name: shl_s64_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX8-LABEL: name: shl_s64_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX9-LABEL: name: shl_s64_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX10-LABEL: name: shl_s64_vs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s64) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-LABEL: name: shl_s64_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX6: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX7-LABEL: name: shl_s64_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX8-LABEL: name: shl_s64_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX9-LABEL: name: shl_s64_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    ; GFX10-LABEL: name: shl_s64_vv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
new file mode 100644
index 0000000000000..d41cdee39040f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir
@@ -0,0 +1,203 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX8 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX8,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# ERR-NOT: remark
+# ERR-GFX8: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv)
+
+# ERR-GFX910: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv)
+
+# ERR-NOT: remark
+
+---
+name: shl_s16_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: shl_s16_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX7-LABEL: name: shl_s16_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX8-LABEL: name: shl_s16_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX9-LABEL: name: shl_s16_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX10-LABEL: name: shl_s16_ss
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:sgpr(s16) = G_SHL %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: shl_s16_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: shl_s16_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX7-LABEL: name: shl_s16_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX8-LABEL: name: shl_s16_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX9-LABEL: name: shl_s16_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX10-LABEL: name: shl_s16_sv
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_SHL %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: shl_s16_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: shl_s16_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX7-LABEL: name: shl_s16_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX8-LABEL: name: shl_s16_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX9-LABEL: name: shl_s16_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX10-LABEL: name: shl_s16_vs
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_SHL %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: shl_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: shl_s16_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX7-LABEL: name: shl_s16_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX8-LABEL: name: shl_s16_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX9-LABEL: name: shl_s16_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16)
+    ; GFX10-LABEL: name: shl_s16_vv
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_SHL %2, %1
+    S_ENDPGM 0, implicit %3
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir
new file mode 100644
index 0000000000000..ad9b078bcd6fd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir
@@ -0,0 +1,168 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# ERR-NOT: remark
+# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_SHL %0:sgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_ss)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:sgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_sv)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_vs)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_vv)
+# ERR-NOT: remark
+
+---
+name: shl_v2s16_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: shl_v2s16_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX6: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX7-LABEL: name: shl_v2s16_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX7: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX8-LABEL: name: shl_v2s16_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX8: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX9-LABEL: name: shl_v2s16_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX9: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX10-LABEL: name: shl_v2s16_ss
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX10: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr1
+    %2:sgpr(<2 x s16>) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_v2s16_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: shl_v2s16_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX7-LABEL: name: shl_v2s16_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX8-LABEL: name: shl_v2s16_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX9-LABEL: name: shl_v2s16_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX10-LABEL: name: shl_v2s16_sv
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr0
+    %2:vgpr(<2 x s16>) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_v2s16_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: shl_v2s16_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX7-LABEL: name: shl_v2s16_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX8-LABEL: name: shl_v2s16_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX9-LABEL: name: shl_v2s16_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX10-LABEL: name: shl_v2s16_vs
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr0
+    %2:vgpr(<2 x s16>) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: shl_v2s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: shl_v2s16_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX7-LABEL: name: shl_v2s16_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX7: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX8-LABEL: name: shl_v2s16_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX9-LABEL: name: shl_v2s16_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    ; GFX10-LABEL: name: shl_v2s16_vv
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = G_SHL %0, %1
+    S_ENDPGM 0, implicit %2
+...

From 65e34a3143c48aff8a4200964abc195461f473ac Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Tue, 16 Jul 2019 20:24:33 +0000
Subject: [PATCH 280/451] [PowerPC][HTM] Fix impossible reg-to-reg copy assert
 with ttest builtin

Summary:
This is exposed by our internal testing.
The reduced testcase will assert with "Impossible reg-to-reg copy"

We can't use COPY to do 32-bit to 64-bit conversion.

Reviewers: kbarton, hfinkel, nemanjai

Reviewed By: hfinkel

Subscribers: hiraditya, MaskRay, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64499

llvm-svn: 366255
---
 llvm/lib/Target/PowerPC/PPCInstrHTM.td |  4 +++-
 llvm/test/CodeGen/PowerPC/htm-ttest.ll | 30 ++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/htm-ttest.ll

diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index 1af65fbb7d3bb..104b57a70a2e5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -164,6 +164,8 @@ def : Pat<(int_ppc_tsuspend),
           (TSR 0)>;
 
 def : Pat<(i64 (int_ppc_ttest)),
-          (RLDICL (i64 (COPY (TABORTWCI 0, (LI 0), 0))), 36, 28)>;
+          (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                      (TABORTWCI 0, (LI 0), 0), sub_32)),
+                   36, 28)>;
 
 } // [HasHTM]
diff --git a/llvm/test/CodeGen/PowerPC/htm-ttest.ll b/llvm/test/CodeGen/PowerPC/htm-ttest.ll
new file mode 100644
index 0000000000000..bd9db165f09bf
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/htm-ttest.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \
+; RUN:     -mcpu=pwr8 -mattr=+htm < %s | FileCheck %s
+
+define dso_local void @main() #0 {
+; CHECK-LABEL: main:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    tabortwci. 0, 3, 0
+; CHECK-NEXT:    mfocrf 3, 128
+; CHECK-NEXT:    rldicl 3, 3, 36, 28
+; CHECK-NEXT:    rlwinm. 3, 3, 31, 30, 31
+; CHECK-NEXT:    beqlr+ 0
+; CHECK-NEXT:  # %bb.1:
+  %1 = call i64 @llvm.ppc.ttest() #1
+  %2 = lshr i64 %1, 1
+  %3 = and i64 %2, 3
+  %4 = icmp eq i64 %3, 0
+  br i1 %4, label %5, label %6
+
+5:                                                ; preds = %0
+  ret void
+
+6:                                                ; preds = %0
+  unreachable
+}
+
+; Function Attrs: nounwind
+declare i64 @llvm.ppc.ttest() #1
+

From e5b28b98e997f9b19ace6bcb95f6298b15b82cd4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 20:25:43 +0000
Subject: [PATCH 281/451] AMDGPU/GlobalISel: Select G_LSHR

llvm-svn: 366256
---
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |   4 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    |   2 +-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |   2 +-
 .../AMDGPU/GlobalISel/inst-select-lshr.mir    | 327 ++++++++++++++++++
 .../GlobalISel/inst-select-lshr.s16.mir       | 203 +++++++++++
 .../GlobalISel/inst-select-lshr.v2s16.mir     | 169 +++++++++
 6 files changed, 703 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir

diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index b762b84d9ca7d..61a720bdaadf1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -517,10 +517,10 @@ def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
   [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
-  [(set i32:$sdst, (UniformBinFrag<srl> i32:$src0, i32:$src1))]
+  [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
-  [(set i64:$sdst, (UniformBinFrag<srl> i64:$src0, i32:$src1))]
+  [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
   [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 260e8a498fb7b..9a5080f12684e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -472,7 +472,7 @@ defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
 defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
 defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
-defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">;
 defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
 defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
 defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 6ebb9557c3c0f..26beb347fc43f 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -394,7 +394,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
 
 let SubtargetPredicate = isGFX8Plus in {
 def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
-def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
+def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
 def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
 } // End SubtargetPredicate = isGFX8Plus
 } // End SchedRW = [Write64Bit]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir
new file mode 100644
index 0000000000000..9e80c266c49b3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir
@@ -0,0 +1,327 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
+
+---
+name: lshr_s32_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: lshr_s32_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX6: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_LSHR_B32_]]
+    ; GFX7-LABEL: name: lshr_s32_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX7: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX7: S_ENDPGM 0, implicit [[S_LSHR_B32_]]
+    ; GFX8-LABEL: name: lshr_s32_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX8: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_LSHR_B32_]]
+    ; GFX9-LABEL: name: lshr_s32_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_LSHR_B32_]]
+    ; GFX10-LABEL: name: lshr_s32_ss
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX10: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10: S_ENDPGM 0, implicit [[S_LSHR_B32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s32) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s32_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: lshr_s32_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX7-LABEL: name: lshr_s32_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX8-LABEL: name: lshr_s32_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX9-LABEL: name: lshr_s32_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX10-LABEL: name: lshr_s32_sv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s32_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: lshr_s32_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX7-LABEL: name: lshr_s32_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX8-LABEL: name: lshr_s32_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX9-LABEL: name: lshr_s32_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX10-LABEL: name: lshr_s32_vs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s32) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s32_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: lshr_s32_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX7-LABEL: name: lshr_s32_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX7: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX8-LABEL: name: lshr_s32_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX9-LABEL: name: lshr_s32_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    ; GFX10-LABEL: name: lshr_s32_vv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s64_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2
+    ; GFX6-LABEL: name: lshr_s64_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX6: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_LSHR_B64_]]
+    ; GFX7-LABEL: name: lshr_s64_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX7: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX7: S_ENDPGM 0, implicit [[S_LSHR_B64_]]
+    ; GFX8-LABEL: name: lshr_s64_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX8: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_LSHR_B64_]]
+    ; GFX9-LABEL: name: lshr_s64_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX9: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_LSHR_B64_]]
+    ; GFX10-LABEL: name: lshr_s64_ss
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX10: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10: S_ENDPGM 0, implicit [[S_LSHR_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s32) = COPY $sgpr2
+    %2:sgpr(s64) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s64_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX6-LABEL: name: lshr_s64_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX7-LABEL: name: lshr_s64_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX8-LABEL: name: lshr_s64_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX9-LABEL: name: lshr_s64_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX10-LABEL: name: lshr_s64_sv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s64) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s64_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0_vgpr1
+    ; GFX6-LABEL: name: lshr_s64_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX7-LABEL: name: lshr_s64_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX8-LABEL: name: lshr_s64_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX9-LABEL: name: lshr_s64_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX10-LABEL: name: lshr_s64_vs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s64) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_s64_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-LABEL: name: lshr_s64_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX6: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX7-LABEL: name: lshr_s64_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX8-LABEL: name: lshr_s64_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX9-LABEL: name: lshr_s64_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    ; GFX10-LABEL: name: lshr_s64_vv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
new file mode 100644
index 0000000000000..2a2f600c5b7c6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir
@@ -0,0 +1,203 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX8 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX8,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# ERR-NOT: remark
+# ERR-GFX8: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv)
+
+# ERR-GFX910: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv)
+
+# ERR-NOT: remark
+
+---
+name: lshr_s16_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: lshr_s16_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX7-LABEL: name: lshr_s16_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX8-LABEL: name: lshr_s16_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX9-LABEL: name: lshr_s16_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX10-LABEL: name: lshr_s16_ss
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:sgpr(s16) = G_LSHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: lshr_s16_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: lshr_s16_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX7-LABEL: name: lshr_s16_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX8-LABEL: name: lshr_s16_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX9-LABEL: name: lshr_s16_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX10-LABEL: name: lshr_s16_sv
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_LSHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: lshr_s16_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: lshr_s16_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX7-LABEL: name: lshr_s16_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX8-LABEL: name: lshr_s16_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX9-LABEL: name: lshr_s16_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX10-LABEL: name: lshr_s16_vs
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_LSHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: lshr_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: lshr_s16_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX7-LABEL: name: lshr_s16_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX8-LABEL: name: lshr_s16_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX9-LABEL: name: lshr_s16_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16)
+    ; GFX10-LABEL: name: lshr_s16_vv
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_LSHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir
new file mode 100644
index 0000000000000..35724e0b4d8ec
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir
@@ -0,0 +1,169 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# ERR-NOT: remark
+# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_ss)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_sv)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_vs)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_vv)
+# ERR-NOT: remark
+
+---
+name: lshr_v2s16_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: lshr_v2s16_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX6: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX7-LABEL: name: lshr_v2s16_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX7: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX8-LABEL: name: lshr_v2s16_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX8: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX9-LABEL: name: lshr_v2s16_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX9: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX10-LABEL: name: lshr_v2s16_ss
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX10: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr1
+    %2:sgpr(<2 x s16>) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_v2s16_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: lshr_v2s16_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX7-LABEL: name: lshr_v2s16_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX8-LABEL: name: lshr_v2s16_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX9-LABEL: name: lshr_v2s16_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX10-LABEL: name: lshr_v2s16_sv
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr0
+    %2:vgpr(<2 x s16>) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_v2s16_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: lshr_v2s16_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX7-LABEL: name: lshr_v2s16_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX8-LABEL: name: lshr_v2s16_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX9-LABEL: name: lshr_v2s16_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX10-LABEL: name: lshr_v2s16_vs
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr0
+    %2:vgpr(<2 x s16>) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: lshr_v2s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: lshr_v2s16_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX7-LABEL: name: lshr_v2s16_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX7: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX8-LABEL: name: lshr_v2s16_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX9-LABEL: name: lshr_v2s16_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    ; GFX10-LABEL: name: lshr_v2s16_vv
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = G_LSHR %0, %1
+    S_ENDPGM 0, implicit %2
+...

From f8c8284455b11ae99aa90322b40ab5ae18d2a1b5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 20:31:25 +0000
Subject: [PATCH 282/451] AMDGPU/GlobalISel: Select G_ASHR

llvm-svn: 366257
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td         |   9 -
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |   4 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    |   2 +-
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |   2 +-
 .../AMDGPU/GlobalISel/inst-select-ashr.mir    | 363 +++++++++++++++---
 .../GlobalISel/inst-select-ashr.s16.mir       | 203 ++++++++++
 .../GlobalISel/inst-select-ashr.v2s16.mir     | 169 ++++++++
 7 files changed, 680 insertions(+), 72 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 13ca1ce4b28fe..cad4c2ef404c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -143,15 +143,6 @@ multiclass GISelVop2IntrPat <
 def : GISelSop2Pat <or, S_OR_B32, i32>;
 def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
 
-def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
-let AddedComplexity = 100 in {
-let SubtargetPredicate = isGFX6GFX7 in {
-def : GISelVop2Pat <sra, V_ASHR_I32_e32, i32>;
-}
-def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
-}
-def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
-
 // FIXME: We can't re-use SelectionDAG patterns here because they match
 // against a custom SDNode and we would need to create a generic machine
 // instruction that is equivalent to the custom SDNode.  This would also require
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 61a720bdaadf1..dfafdccc05a3f 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -523,10 +523,10 @@ def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
   [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
-  [(set i32:$sdst, (UniformBinFrag<sra> i32:$src0, i32:$src1))]
+  [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
 >;
 def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
-  [(set i64:$sdst, (UniformBinFrag<sra> i64:$src0, i32:$src1))]
+  [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
 >;
 } // End Defs = [SCC]
 
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 9a5080f12684e..1b30cd2ed5165 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -473,7 +473,7 @@ defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
 defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
 defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
 defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">;
-defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">;
 defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
 defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
 defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 26beb347fc43f..21dbef9240e10 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -395,7 +395,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
 let SubtargetPredicate = isGFX8Plus in {
 def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
 def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
-def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
+def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
 } // End SubtargetPredicate = isGFX8Plus
 } // End SchedRW = [Write64Bit]
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir
index 3209f4fb808f1..f6176692cefc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir
@@ -1,82 +1,327 @@
-# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
 
 ---
-
-name:            ashr
-legalized:       true
+name: ashr_s32_ss
+legalized: true
 regBankSelected: true
 
-# GCN-LABEL: name: ashr
 body: |
   bb.0:
-    liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
-    ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-    ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: ashr_s32_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX6: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_ASHR_I32_]]
+    ; GFX7-LABEL: name: ashr_s32_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX7: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX7: S_ENDPGM 0, implicit [[S_ASHR_I32_]]
+    ; GFX8-LABEL: name: ashr_s32_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX8: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_ASHR_I32_]]
+    ; GFX9-LABEL: name: ashr_s32_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX9: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_ASHR_I32_]]
+    ; GFX10-LABEL: name: ashr_s32_ss
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; GFX10: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10: S_ENDPGM 0, implicit [[S_ASHR_I32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
-    %2:vgpr(s32) = COPY $vgpr0
-    %3:vgpr(p1) = COPY $vgpr3_vgpr4
-
-    ; GCN: [[C1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
-    ; GCN: [[C4096:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
-    %4:sgpr(s32) = G_CONSTANT i32 1
-    %5:sgpr(s32) = G_CONSTANT i32 4096
-
-    ; ashr ss
-    ; GCN: [[SS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SGPR0]], [[SGPR1]]
-    %6:sgpr(s32) = G_ASHR %0, %1
+    %2:sgpr(s32) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; ashr si
-    ; GCN: [[SI:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SS]], [[C1]]
-    %7:sgpr(s32) = G_ASHR %6, %4
+---
+name: ashr_s32_sv
+legalized: true
+regBankSelected: true
 
-    ; ashr is
-    ; GCN: [[IS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[C1]], [[SI]]
-    %8:sgpr(s32) = G_ASHR %4, %7
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: ashr_s32_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX7-LABEL: name: ashr_s32_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX8-LABEL: name: ashr_s32_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX9-LABEL: name: ashr_s32_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX10-LABEL: name: ashr_s32_sv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s32) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; ashr sc
-    ; GCN: [[SC:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[IS]], [[C4096]]
-    %9:sgpr(s32) = G_ASHR %8, %5
+---
+name: ashr_s32_vs
+legalized: true
+regBankSelected: true
 
-    ; ashr cs
-    ; GCN: [[CS:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[C4096]], [[SC]]
-    %10:sgpr(s32) = G_ASHR %5, %9
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: ashr_s32_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX7-LABEL: name: ashr_s32_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX8-LABEL: name: ashr_s32_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX9-LABEL: name: ashr_s32_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX10-LABEL: name: ashr_s32_vs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s32) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; ashr vs
-    ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[CS]], [[VGPR0]]
-    %11:vgpr(s32) = G_ASHR %2, %10
+---
+name: ashr_s32_vv
+legalized: true
+regBankSelected: true
 
-    ; ashr sv
-    ; SI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[CS]], [[VS]]
-    ; VI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VS]], [[CS]]
-    %12:vgpr(s32) = G_ASHR %10, %11
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: ashr_s32_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX7-LABEL: name: ashr_s32_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX7: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX8-LABEL: name: ashr_s32_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX8: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX9-LABEL: name: ashr_s32_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX9: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    ; GFX10-LABEL: name: ashr_s32_vv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s32) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; ashr vv
-    ; SI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[SV]], [[VGPR0]]
-    ; VI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[VGPR0]], [[SV]]
-    %13:vgpr(s32) = G_ASHR %12, %2
+---
+name: ashr_s64_ss
+legalized: true
+regBankSelected: true
 
-    ; ashr iv
-    ; SI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C1]], [[VV]]
-    ; VI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VV]], [[C1]]
-    %14:vgpr(s32) = G_ASHR %4, %13
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr2
+    ; GFX6-LABEL: name: ashr_s64_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX6: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    ; GFX7-LABEL: name: ashr_s64_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX7: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX7: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    ; GFX8-LABEL: name: ashr_s64_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX8: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX8: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    ; GFX9-LABEL: name: ashr_s64_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX9: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    ; GFX10-LABEL: name: ashr_s64_ss
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
+    ; GFX10: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; GFX10: S_ENDPGM 0, implicit [[S_ASHR_I64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:sgpr(s32) = COPY $sgpr2
+    %2:sgpr(s64) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; ashr vi
-    ; GCN: [[VI:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C1]], [[IV]]
-    %15:vgpr(s32) = G_ASHR %14, %4
+---
+name: ashr_s64_sv
+legalized: true
+regBankSelected: true
 
-    ; ashr cv
-    ; SI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C4096]], [[VI]]
-    ; VI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VI]], [[C4096]]
-    %16:vgpr(s32) = G_ASHR %5, %15
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $vgpr0
+    ; GFX6-LABEL: name: ashr_s64_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX7-LABEL: name: ashr_s64_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX7: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX8-LABEL: name: ashr_s64_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX8: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX9-LABEL: name: ashr_s64_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX10-LABEL: name: ashr_s64_sv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    %0:sgpr(s64) = COPY $sgpr0_sgpr1
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:vgpr(s64) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    ; ashr vc
-    ; GCN: [[VC:%[-1-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C4096]], [[CV]]
-    %17:vgpr(s32) = G_ASHR %16, %5
+---
+name: ashr_s64_vs
+legalized: true
+regBankSelected: true
 
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0_vgpr1
+    ; GFX6-LABEL: name: ashr_s64_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX6: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX7-LABEL: name: ashr_s64_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX7: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX8-LABEL: name: ashr_s64_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX8: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX9-LABEL: name: ashr_s64_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX9: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX10-LABEL: name: ashr_s64_vs
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
+    ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s64) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
 
-    S_ENDPGM 0, implicit %17
+---
+name: ashr_s64_vv
+legalized: true
+regBankSelected: true
 
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2
+    ; GFX6-LABEL: name: ashr_s64_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX6: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX7-LABEL: name: ashr_s64_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX7: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX8-LABEL: name: ashr_s64_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX8: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX9-LABEL: name: ashr_s64_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX9: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    ; GFX10-LABEL: name: ashr_s64_vv
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s32) = COPY $vgpr2
+    %2:vgpr(s64) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
 ...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
new file mode 100644
index 0000000000000..1a90e609f7bd3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir
@@ -0,0 +1,203 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX8 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX8,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# ERR-NOT: remark
+# ERR-GFX8: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs)
+# ERR-GFX8-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv)
+
+# ERR-GFX910: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv)
+
+# ERR-NOT: remark
+
+---
+name: ashr_s16_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: ashr_s16_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX7-LABEL: name: ashr_s16_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX8-LABEL: name: ashr_s16_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX9-LABEL: name: ashr_s16_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX10-LABEL: name: ashr_s16_ss
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+    ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = COPY $sgpr1
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:sgpr(s16) = G_ASHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: ashr_s16_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: ashr_s16_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX7-LABEL: name: ashr_s16_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX8-LABEL: name: ashr_s16_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX9-LABEL: name: ashr_s16_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX10-LABEL: name: ashr_s16_sv
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16)
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:vgpr(s32) = COPY $vgpr0
+    %2:sgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_ASHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: ashr_s16_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: ashr_s16_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX7-LABEL: name: ashr_s16_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX8-LABEL: name: ashr_s16_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX9-LABEL: name: ashr_s16_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX10-LABEL: name: ashr_s16_vs
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_ASHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: ashr_s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: ashr_s16_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX7-LABEL: name: ashr_s16_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX8-LABEL: name: ashr_s16_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX9-LABEL: name: ashr_s16_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16)
+    ; GFX10-LABEL: name: ashr_s16_vv
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vgpr(s16) = G_TRUNC %0
+    %3:vgpr(s16) = G_ASHR %2, %1
+    S_ENDPGM 0, implicit %3
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir
new file mode 100644
index 0000000000000..20602f748254f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir
@@ -0,0 +1,169 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s
+# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t
+
+# ERR-NOT: remark
+# ERR-GFX910: remark: <unknown>:0:0: cannot select: %2:sgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_ss)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_sv)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_vs)
+# ERR-GFX910-NEXT: remark: <unknown>:0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_vv)
+# ERR-NOT: remark
+
+---
+name: ashr_v2s16_ss
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; GFX6-LABEL: name: ashr_v2s16_ss
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX6: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX7-LABEL: name: ashr_v2s16_ss
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX7: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX8-LABEL: name: ashr_v2s16_ss
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX8: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX9-LABEL: name: ashr_v2s16_ss
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX9: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX10-LABEL: name: ashr_v2s16_ss
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+    ; GFX10: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr1
+    %2:sgpr(<2 x s16>) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: ashr_v2s16_sv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: ashr_v2s16_sv
+    ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX7-LABEL: name: ashr_v2s16_sv
+    ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX8-LABEL: name: ashr_v2s16_sv
+    ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX9-LABEL: name: ashr_v2s16_sv
+    ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX10-LABEL: name: ashr_v2s16_sv
+    ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    %0:sgpr(<2 x s16>) = COPY $sgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr0
+    %2:vgpr(<2 x s16>) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: ashr_v2s16_vs
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+    ; GFX6-LABEL: name: ashr_v2s16_vs
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX6: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX7-LABEL: name: ashr_v2s16_vs
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX7: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX8-LABEL: name: ashr_v2s16_vs
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX9-LABEL: name: ashr_v2s16_vs
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX10-LABEL: name: ashr_v2s16_vs
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+    ; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:sgpr(<2 x s16>) = COPY $sgpr0
+    %2:vgpr(<2 x s16>) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: ashr_v2s16_vv
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: ashr_v2s16_vv
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX7-LABEL: name: ashr_v2s16_vv
+    ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX7: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX8-LABEL: name: ashr_v2s16_vv
+    ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX9-LABEL: name: ashr_v2s16_vv
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    ; GFX10-LABEL: name: ashr_v2s16_vv
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+    ; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>)
+    ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>)
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = COPY $vgpr1
+    %2:vgpr(<2 x s16>) = G_ASHR %0, %1
+    S_ENDPGM 0, implicit %2
+...

From d069a1921065bda2c6379bb7b6dfcff1572a285e Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Tue, 16 Jul 2019 20:41:04 +0000
Subject: [PATCH 283/451] [TSan] Improve handling of stack pointer mangling in
 {set,long}jmp, pt.6

Cleanup ARM64 assembly after removing unnecessary computation of mangled
stack pointer in previous commit.

llvm-svn: 366258
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S | 89 ++++++++++-----------
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
index c35897d3c3624..e0b4c71dfed9a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S
@@ -39,7 +39,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
 ASM_SYMBOL_INTERCEPTOR(setjmp):
   CFI_STARTPROC
 
-  // save env parameters for function call
+  // Save frame/link register
   stp     x29, x30, [sp, -32]!
   CFI_DEF_CFA_OFFSET (32)
   CFI_OFFSET (29, -32)
@@ -49,10 +49,9 @@ ASM_SYMBOL_INTERCEPTOR(setjmp):
   add     x29, sp, 0
   CFI_DEF_CFA_REGISTER (29)
 
-  // Save jmp_buf
-  str     x19, [sp, 16]
-  CFI_OFFSET (19, -16)
-  mov     x19, x0
+  // Save env parameter
+  str     x0, [sp, 16]
+  CFI_OFFSET (0, -16)
 
   // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
   add     x0, x29, 32
@@ -60,12 +59,14 @@ ASM_SYMBOL_INTERCEPTOR(setjmp):
   // call tsan interceptor
   bl      ASM_SYMBOL(__tsan_setjmp)
 
-  // restore env parameter
-  mov     x0, x19
-  ldr     x19, [sp, 16]
+  // Restore env parameter
+  ldr     x0, [sp, 16]
+  CFI_RESTORE (0)
+
+  // Restore frame/link register
   ldp     x29, x30, [sp], 32
+  CFI_RESTORE (29)
   CFI_RESTORE (30)
-  CFI_RESTORE (19)
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc setjmp
@@ -89,7 +90,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
 ASM_SYMBOL_INTERCEPTOR(_setjmp):
   CFI_STARTPROC
 
-  // save env parameters for function call
+  // Save frame/link register
   stp     x29, x30, [sp, -32]!
   CFI_DEF_CFA_OFFSET (32)
   CFI_OFFSET (29, -32)
@@ -99,10 +100,9 @@ ASM_SYMBOL_INTERCEPTOR(_setjmp):
   add     x29, sp, 0
   CFI_DEF_CFA_REGISTER (29)
 
-  // Save jmp_buf
-  str     x19, [sp, 16]
-  CFI_OFFSET (19, -16)
-  mov     x19, x0
+  // Save env parameter
+  str     x0, [sp, 16]
+  CFI_OFFSET (0, -16)
 
   // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
   add     x0, x29, 32
@@ -110,12 +110,14 @@ ASM_SYMBOL_INTERCEPTOR(_setjmp):
   // call tsan interceptor
   bl      ASM_SYMBOL(__tsan_setjmp)
 
-  // Restore jmp_buf parameter
-  mov     x0, x19
-  ldr     x19, [sp, 16]
+  // Restore env parameter
+  ldr     x0, [sp, 16]
+  CFI_RESTORE (0)
+
+  // Restore frame/link register
   ldp     x29, x30, [sp], 32
+  CFI_RESTORE (29)
   CFI_RESTORE (30)
-  CFI_RESTORE (19)
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc setjmp
@@ -139,7 +141,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
 ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
   CFI_STARTPROC
 
-  // save env parameters for function call
+  // Save frame/link register
   stp     x29, x30, [sp, -32]!
   CFI_DEF_CFA_OFFSET (32)
   CFI_OFFSET (29, -32)
@@ -149,12 +151,10 @@ ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
   add     x29, sp, 0
   CFI_DEF_CFA_REGISTER (29)
 
-  // Save jmp_buf and savesigs
-  stp     x19, x20, [sp, 16]
-  CFI_OFFSET (19, -16)
-  CFI_OFFSET (20, -8)
-  mov     w20, w1
-  mov     x19, x0
+  // Save env and savesigs parameter
+  stp     x0, x1, [sp, 16]
+  CFI_OFFSET (0, -16)
+  CFI_OFFSET (1, -8)
 
   // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
   add     x0, x29, 32
@@ -162,15 +162,15 @@ ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
   // call tsan interceptor
   bl      ASM_SYMBOL(__tsan_setjmp)
 
-  // restore env parameter
-  mov     w1, w20
-  mov     x0, x19
-  ldp     x19, x20, [sp, 16]
+  // Restore env and savesigs parameter
+  ldp     x0, x1, [sp, 16]
+  CFI_RESTORE (0)
+  CFI_RESTORE (1)
+
+  // Restore frame/link register
   ldp     x29, x30, [sp], 32
-  CFI_RESTORE (30)
   CFI_RESTORE (29)
-  CFI_RESTORE (19)
-  CFI_RESTORE (20)
+  CFI_RESTORE (30)
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc sigsetjmp
@@ -194,7 +194,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
 ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
   CFI_STARTPROC
 
-  // save env parameters for function call
+  // Save frame/link register
   stp     x29, x30, [sp, -32]!
   CFI_DEF_CFA_OFFSET (32)
   CFI_OFFSET (29, -32)
@@ -204,12 +204,10 @@ ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
   add     x29, sp, 0
   CFI_DEF_CFA_REGISTER (29)
 
-  // Save jmp_buf and savesigs
-  stp     x19, x20, [sp, 16]
-  CFI_OFFSET (19, -16)
-  CFI_OFFSET (20, -8)
-  mov     w20, w1
-  mov     x19, x0
+  // Save env and savesigs parameter
+  stp     x0, x1, [sp, 16]
+  CFI_OFFSET (0, -16)
+  CFI_OFFSET (1, -8)
 
   // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
   add     x0, x29, 32
@@ -217,14 +215,15 @@ ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
   // call tsan interceptor
   bl      ASM_SYMBOL(__tsan_setjmp)
 
-  mov     w1, w20
-  mov     x0, x19
-  ldp     x19, x20, [sp, 16]
+  // Restore env and savesigs parameter
+  ldp     x0, x1, [sp, 16]
+  CFI_RESTORE (0)
+  CFI_RESTORE (1)
+
+  // Restore frame/link register
   ldp     x29, x30, [sp], 32
-  CFI_RESTORE (30)
   CFI_RESTORE (29)
-  CFI_RESTORE (19)
-  CFI_RESTORE (20)
+  CFI_RESTORE (30)
   CFI_DEF_CFA (31, 0)
 
   // tail jump to libc __sigsetjmp

From 12154ee8f1ee170bac97ef238dece50d504de08a Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 16 Jul 2019 20:41:33 +0000
Subject: [PATCH 284/451] [libc++] Add missing UNSUPPORTED for CTAD tests

The tests for unordered_set and unordered_multiset were missing UNSUPPORTED
markup for Apple Clang 9.1, which is still being used on some CI bots.

llvm-svn: 366259
---
 .../unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp    | 1 +
 .../containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp
index 7becaf96009a4..683d201976414 100644
--- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp
@@ -9,6 +9,7 @@
 // <unordered_set>
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // UNSUPPORTED: libcpp-no-deduction-guides
+// UNSUPPORTED: apple-clang-9.1
 
 // template<class InputIterator,
 //        class Hash = hash<iter-value-type<InputIterator>>,
diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp
index 20799823692a5..95bc08293f537 100644
--- a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp
+++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp
@@ -9,6 +9,7 @@
 // <unordered_set>
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 // UNSUPPORTED: libcpp-no-deduction-guides
+// UNSUPPORTED: apple-clang-9.1
 
 // template<class InputIterator,
 //        class Hash = hash<iter-value-type<InputIterator>>,

From ddbb83732afc2e392eca5873910b5424a069e656 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <adamski.dominik@gmail.com>
Date: Tue, 16 Jul 2019 20:51:04 +0000
Subject: [PATCH 285/451] [NFC][ScopBuilder] Move addRecordedAssumption to
 ScopBuilder

Scope of changes:
1) Moved addRecordedAssumptions to ScopBuilder.
2) Moved Assumption struct outside Scop class.
3) Refactored addRecordedAssumptions function. Replaced while loop by
for range loop.
4) Added function to clear processed Assumptions.

Differential Revision: https://reviews.llvm.org/D63572

llvm-svn: 366260
---
 polly/include/polly/ScopBuilder.h  |  3 ++
 polly/include/polly/ScopInfo.h     | 51 +++++++++++++++++-------------
 polly/lib/Analysis/ScopBuilder.cpp | 36 ++++++++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 33 -------------------
 4 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index bc0007009c6e0..9cfdf7b3cbf44 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -327,6 +327,9 @@ class ScopBuilder {
                       BasicBlock *IncomingBlock, Value *IncomingValue,
                       bool IsExitBlock);
 
+  /// Add all recorded assumptions to the assumed context.
+  void addRecordedAssumptions();
+
   /// Create a MemoryAccess for reading the value of a phi.
   ///
   /// The modeling assumes that all incoming blocks write their incoming value
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index aeed4a7e3d4f3..30e8d0346ad58 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -1624,6 +1624,24 @@ class ScopStmt {
 /// Print ScopStmt S to raw_ostream OS.
 raw_ostream &operator<<(raw_ostream &OS, const ScopStmt &S);
 
+/// Helper struct to remember assumptions.
+struct Assumption {
+  /// The kind of the assumption (e.g., WRAPPING).
+  AssumptionKind Kind;
+
+  /// Flag to distinguish assumptions and restrictions.
+  AssumptionSign Sign;
+
+  /// The valid/invalid context if this is an assumption/restriction.
+  isl::set Set;
+
+  /// The location that caused this assumption.
+  DebugLoc Loc;
+
+  /// An optional block whose domain can simplify the assumption.
+  BasicBlock *BB;
+};
+
 /// Static Control Part
 ///
 /// A Scop is the polyhedral representation of a control flow region detected
@@ -1782,24 +1800,7 @@ class Scop {
   /// need to be "false". Otherwise they behave the same.
   isl::set InvalidContext;
 
-  /// Helper struct to remember assumptions.
-  struct Assumption {
-    /// The kind of the assumption (e.g., WRAPPING).
-    AssumptionKind Kind;
-
-    /// Flag to distinguish assumptions and restrictions.
-    AssumptionSign Sign;
-
-    /// The valid/invalid context if this is an assumption/restriction.
-    isl::set Set;
-
-    /// The location that caused this assumption.
-    DebugLoc Loc;
-
-    /// An optional block whose domain can simplify the assumption.
-    BasicBlock *BB;
-  };
-
+  using RecordedAssumptionsTy = SmallVector<Assumption, 8>;
   /// Collection to hold taken assumptions.
   ///
   /// There are two reasons why we want to record assumptions first before we
@@ -1810,7 +1811,7 @@ class Scop {
   ///      construction (basically after we know all parameters), thus the user
   ///      might see overly complicated assumptions to be taken while they will
   ///      only be simplified later on.
-  SmallVector<Assumption, 8> RecordedAssumptions;
+  RecordedAssumptionsTy RecordedAssumptions;
 
   /// The schedule of the SCoP
   ///
@@ -2338,6 +2339,12 @@ class Scop {
                       InvariantEquivClasses.end());
   }
 
+  /// Return an iterator range containing hold assumptions.
+  iterator_range<RecordedAssumptionsTy::const_iterator>
+  recorded_assumptions() const {
+    return make_range(RecordedAssumptions.begin(), RecordedAssumptions.end());
+  }
+
   /// Return whether this scop is empty, i.e. contains no statements that
   /// could be executed.
   bool isEmpty() const { return Stmts.empty(); }
@@ -2494,6 +2501,9 @@ class Scop {
   /// @returns True if the optimized SCoP can be executed.
   bool hasFeasibleRuntimeContext() const;
 
+  /// Clear assumptions which have been already processed.
+  void clearRecordedAssumptions() { return RecordedAssumptions.clear(); }
+
   /// Check if the assumption in @p Set is trivial or not.
   ///
   /// @param Set  The relations between parameters that are assumed to hold.
@@ -2559,9 +2569,6 @@ class Scop {
   void recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
                         AssumptionSign Sign, BasicBlock *BB = nullptr);
 
-  /// Add all recorded assumptions to the assumed context.
-  void addRecordedAssumptions();
-
   /// Mark the scop as invalid.
   ///
   /// This method adds an assumption to the scop that is always invalid. As a
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index 8ae29e223a7eb..617bc81908273 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -385,6 +385,40 @@ Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) {
   return Descriptor;
 }
 
+void ScopBuilder::addRecordedAssumptions() {
+  for (auto &AS : llvm::reverse(scop->recorded_assumptions())) {
+
+    if (!AS.BB) {
+      scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
+                          nullptr /* BasicBlock */);
+      continue;
+    }
+
+    // If the domain was deleted the assumptions are void.
+    isl_set *Dom = scop->getDomainConditions(AS.BB).release();
+    if (!Dom)
+      continue;
+
+    // If a basic block was given use its domain to simplify the assumption.
+    // In case of restrictions we know they only have to hold on the domain,
+    // thus we can intersect them with the domain of the block. However, for
+    // assumptions the domain has to imply them, thus:
+    //                     _              _____
+    //   Dom => S   <==>   A v B   <==>   A - B
+    //
+    // To avoid the complement we will register A - B as a restriction not an
+    // assumption.
+    isl_set *S = AS.Set.copy();
+    if (AS.Sign == AS_RESTRICTION)
+      S = isl_set_params(isl_set_intersect(S, Dom));
+    else /* (AS.Sign == AS_ASSUMPTION) */
+      S = isl_set_params(isl_set_subtract(Dom, S));
+
+    scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB);
+  }
+  scop->clearRecordedAssumptions();
+}
+
 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
   Value *Val = Inst.getValueOperand();
   Type *ElementType = Val->getType();
@@ -1972,7 +2006,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
   // After the context was fully constructed, thus all our knowledge about
   // the parameters is in there, we add all recorded assumptions to the
   // assumed/invalid context.
-  scop->addRecordedAssumptions();
+  addRecordedAssumptions();
 
   scop->simplifyContexts();
   if (!scop->buildAliasChecks(AA)) {
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 0e5c3c39435cf..2b0d8052aa03e 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -3779,39 +3779,6 @@ void Scop::recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc,
   RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB});
 }
 
-void Scop::addRecordedAssumptions() {
-  while (!RecordedAssumptions.empty()) {
-    Assumption AS = RecordedAssumptions.pop_back_val();
-
-    if (!AS.BB) {
-      addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */);
-      continue;
-    }
-
-    // If the domain was deleted the assumptions are void.
-    isl_set *Dom = getDomainConditions(AS.BB).release();
-    if (!Dom)
-      continue;
-
-    // If a basic block was given use its domain to simplify the assumption.
-    // In case of restrictions we know they only have to hold on the domain,
-    // thus we can intersect them with the domain of the block. However, for
-    // assumptions the domain has to imply them, thus:
-    //                     _              _____
-    //   Dom => S   <==>   A v B   <==>   A - B
-    //
-    // To avoid the complement we will register A - B as a restriction not an
-    // assumption.
-    isl_set *S = AS.Set.copy();
-    if (AS.Sign == AS_RESTRICTION)
-      S = isl_set_params(isl_set_intersect(S, Dom));
-    else /* (AS.Sign == AS_ASSUMPTION) */
-      S = isl_set_params(isl_set_subtract(Dom, S));
-
-    addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB);
-  }
-}
-
 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) {
   LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n");
   addAssumption(Kind, isl::set::empty(getParamSpace()), Loc, AS_ASSUMPTION, BB);

From 0e534de4fef8d13116283a841d6d1875222a3ed3 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Tue, 16 Jul 2019 21:05:08 +0000
Subject: [PATCH 286/451] [Symbol] Remove unused fields from ClangASTContext

llvm-svn: 366261
---
 lldb/include/lldb/Symbol/ClangASTContext.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h
index dda9f4347f0cd..d0a834e01f3aa 100644
--- a/lldb/include/lldb/Symbol/ClangASTContext.h
+++ b/lldb/include/lldb/Symbol/ClangASTContext.h
@@ -982,7 +982,6 @@ class ClangASTContext : public TypeSystem {
     std::unique_ptr<clang::ASTContext>              m_ast_up;
     std::unique_ptr<clang::LangOptions>             m_language_options_up;
     std::unique_ptr<clang::FileManager>             m_file_manager_up;
-    std::unique_ptr<clang::FileSystemOptions>       m_file_system_options_up;
     std::unique_ptr<clang::SourceManager>           m_source_manager_up;
     std::unique_ptr<clang::DiagnosticsEngine>       m_diagnostics_engine_up;
     std::unique_ptr<clang::DiagnosticConsumer>      m_diagnostic_consumer_up;
@@ -1001,7 +1000,6 @@ class ClangASTContext : public TypeSystem {
     clang::ExternalASTMerger::OriginMap             m_origins;
     uint32_t                                        m_pointer_byte_size;
     bool                                            m_ast_owned;
-    bool                                            m_can_evaluate_expressions;
     /// The sema associated that is currently used to build this ASTContext.
     /// May be null if we are already done parsing this ASTContext or the
     /// ASTContext wasn't created by parsing source code.

From 588fc9e756d3c9981cf7b17f18bd199e7bcd4172 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <adamski.dominik@gmail.com>
Date: Tue, 16 Jul 2019 21:10:45 +0000
Subject: [PATCH 287/451] [NFC][ScopBuilder] Move buildAliasChecks and its
 implementing methods to ScopBuilder

Scope of changes:
1) Moved buildAliasChecks to ScopBuilder.
2) Moved buildAliasGroup to ScopBuilder.
3) Moved buildAliasGroups to ScopBuilder.
4) Moved buildAliasGroupsForAccesses to ScopBuilder.
5) Moved splitAliasGroupsByDomain to ScopBuilder.
6) Moved addNonEmptyDomainConstraints to ScopBuilder.
7) Moved buildMinMaxAccess to ScopBuilder.
8) Moved calculateMinMaxAccess to ScopBuilder.
9) Moved getAccessDomain to ScopBuilder.
10) Moved command line options used only by buildAliasChecks functions to ScopBuilder.
11) Refactored buildAliasGroup function. Added addAliasGroup function to Scop class for pushing back calculated min/max accesses.
12) Added function incrementNumberOfAliasingAssumptions which increments number of statistic variable AssumptionsAliasing. AssumptionsAliasing variable is defined by STATISTIC macro inside ScopInfo.cpp and it is also used by function trackAssumption from Scop class.
13) Added reference to OptimizationRemarkEmitter to ScopBuilder class.
14) Moved calculateMinMaxAccess function to ScopBuilder class.

Differential Revision: https://reviews.llvm.org/D63693

llvm-svn: 366262
---
 polly/include/polly/ScopBuilder.h  |  56 ++++-
 polly/include/polly/ScopInfo.h     |  60 +----
 polly/lib/Analysis/ScopBuilder.cpp | 335 +++++++++++++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 371 ++---------------------------
 4 files changed, 420 insertions(+), 402 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index 9cfdf7b3cbf44..0cb90e3ca0e6a 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -30,6 +30,7 @@ extern bool ModelReadOnlyScalars;
 
 /// Build the Polly IR (Scop and ScopStmt) on a Region.
 class ScopBuilder {
+
   /// The AliasAnalysis to build AliasSetTracker.
   AliasAnalysis &AA;
 
@@ -48,6 +49,9 @@ class ScopBuilder {
   /// The ScalarEvolution to help building Scop.
   ScalarEvolution &SE;
 
+  /// An optimization diagnostic interface to add optimization remarks.
+  OptimizationRemarkEmitter &ORE;
+
   /// Set of instructions that might read any memory location.
   SmallVector<std::pair<ScopStmt *, Instruction *>, 16> GlobalReads;
 
@@ -117,8 +121,7 @@ class ScopBuilder {
   // @}
 
   // Build the SCoP for Region @p R.
-  void buildScop(Region &R, AssumptionCache &AC,
-                 OptimizationRemarkEmitter &ORE);
+  void buildScop(Region &R, AssumptionCache &AC);
 
   /// Create equivalence classes for required invariant accesses.
   ///
@@ -175,6 +178,52 @@ class ScopBuilder {
   /// @param Stmt       The parent statement of the instruction
   void buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt);
 
+  /// Build the alias checks for this SCoP.
+  bool buildAliasChecks();
+
+  /// A vector of memory accesses that belong to an alias group.
+  using AliasGroupTy = SmallVector<MemoryAccess *, 4>;
+
+  /// A vector of alias groups.
+  using AliasGroupVectorTy = SmallVector<AliasGroupTy, 4>;
+
+  /// Build a given alias group and its access data.
+  ///
+  /// @param AliasGroup     The alias group to build.
+  /// @param HasWriteAccess A set of arrays through which memory is not only
+  ///                       read, but also written.
+  //
+  /// @returns True if __no__ error occurred, false otherwise.
+  bool buildAliasGroup(AliasGroupTy &AliasGroup,
+                       DenseSet<const ScopArrayInfo *> HasWriteAccess);
+
+  /// Build all alias groups for this SCoP.
+  ///
+  /// @returns True if __no__ error occurred, false otherwise.
+  bool buildAliasGroups();
+
+  /// Build alias groups for all memory accesses in the Scop.
+  ///
+  /// Using the alias analysis and an alias set tracker we build alias sets
+  /// for all memory accesses inside the Scop. For each alias set we then map
+  /// the aliasing pointers back to the memory accesses we know, thus obtain
+  /// groups of memory accesses which might alias. We also collect the set of
+  /// arrays through which memory is written.
+  ///
+  /// @returns A pair consistent of a vector of alias groups and a set of arrays
+  ///          through which memory is written.
+  std::tuple<AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
+  buildAliasGroupsForAccesses();
+
+  ///  Split alias groups by iteration domains.
+  ///
+  ///  We split each group based on the domains of the minimal/maximal accesses.
+  ///  That means two minimal/maximal accesses are only in a group if their
+  ///  access domains intersect. Otherwise, they are in different groups.
+  ///
+  ///  @param AliasGroups The alias groups to split
+  void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups);
+
   /// Build an instance of MemoryAccess from the Load/Store instruction.
   ///
   /// @param Inst       The Load/Store instruction that access the memory
@@ -344,6 +393,9 @@ class ScopBuilder {
   /// @see MemoryKind
   void addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI);
 
+  /// Wrapper function to calculate minimal/maximal accesses to each array.
+  bool calculateMinMaxAccess(AliasGroupTy AliasGroup,
+                             Scop::MinMaxVectorTy &MinMaxAccesses);
   /// Build the domain of @p Stmt.
   void buildDomain(ScopStmt &Stmt);
 
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 30e8d0346ad58..c86513a5f62c9 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2258,6 +2258,12 @@ class Scop {
   Scop &operator=(const Scop &) = delete;
   ~Scop();
 
+  /// Increment actual number of aliasing assumptions taken
+  ///
+  /// @param Step    Number of new aliasing assumptions which should be added to
+  /// the number of already taken assumptions.
+  static void incrementNumberOfAliasingAssumptions(unsigned Step);
+
   /// Get the count of copy statements added to this Scop.
   ///
   /// @return The count of copy statements added to this Scop.
@@ -2589,59 +2595,17 @@ class Scop {
   /// Return true if and only if the InvalidContext is trivial (=empty).
   bool hasTrivialInvalidContext() const { return InvalidContext.is_empty(); }
 
-  /// A vector of memory accesses that belong to an alias group.
-  using AliasGroupTy = SmallVector<MemoryAccess *, 4>;
-
-  /// A vector of alias groups.
-  using AliasGroupVectorTy = SmallVector<Scop::AliasGroupTy, 4>;
-
-  /// Build the alias checks for this SCoP.
-  bool buildAliasChecks(AliasAnalysis &AA);
-
-  /// Build all alias groups for this SCoP.
-  ///
-  /// @returns True if __no__ error occurred, false otherwise.
-  bool buildAliasGroups(AliasAnalysis &AA);
-
-  /// Build alias groups for all memory accesses in the Scop.
-  ///
-  /// Using the alias analysis and an alias set tracker we build alias sets
-  /// for all memory accesses inside the Scop. For each alias set we then map
-  /// the aliasing pointers back to the memory accesses we know, thus obtain
-  /// groups of memory accesses which might alias. We also collect the set of
-  /// arrays through which memory is written.
-  ///
-  /// @param AA A reference to the alias analysis.
-  ///
-  /// @returns A pair consistent of a vector of alias groups and a set of arrays
-  ///          through which memory is written.
-  std::tuple<AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
-  buildAliasGroupsForAccesses(AliasAnalysis &AA);
-
-  ///  Split alias groups by iteration domains.
-  ///
-  ///  We split each group based on the domains of the minimal/maximal accesses.
-  ///  That means two minimal/maximal accesses are only in a group if their
-  ///  access domains intersect. Otherwise, they are in different groups.
-  ///
-  ///  @param AliasGroups The alias groups to split
-  void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups);
-
-  /// Build a given alias group and its access data.
-  ///
-  /// @param AliasGroup     The alias group to build.
-  /// @param HasWriteAccess A set of arrays through which memory is not only
-  ///                       read, but also written.
-  ///
-  /// @returns True if __no__ error occurred, false otherwise.
-  bool buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
-                       DenseSet<const ScopArrayInfo *> HasWriteAccess);
-
   /// Return all alias groups for this SCoP.
   const MinMaxVectorPairVectorTy &getAliasGroups() const {
     return MinMaxAliasGroups;
   }
 
+  void addAliasGroup(MinMaxVectorTy &MinMaxAccessesReadWrite,
+                     MinMaxVectorTy &MinMaxAccessesReadOnly) {
+    MinMaxAliasGroups.emplace_back();
+    MinMaxAliasGroups.back().first = MinMaxAccessesReadWrite;
+    MinMaxAliasGroups.back().second = MinMaxAccessesReadOnly;
+  }
   /// Get an isl string representing the context.
   std::string getContextStr() const;
 
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index 617bc81908273..f079a749dcb11 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -76,6 +76,13 @@ static cl::opt<bool, true> XModelReadOnlyScalars(
     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
     cl::init(true), cl::cat(PollyCategory));
 
+static cl::opt<int>
+    OptComputeOut("polly-analysis-computeout",
+                  cl::desc("Bound the scop analysis by a maximal amount of "
+                           "computational steps (0 means no bound)"),
+                  cl::Hidden, cl::init(800000), cl::ZeroOrMore,
+                  cl::cat(PollyCategory));
+
 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
     "polly-allow-dereference-of-all-function-parameters",
     cl::desc(
@@ -86,6 +93,22 @@ static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
         " their loads. "),
     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 
+static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
+    "polly-rtc-max-arrays-per-group",
+    cl::desc("The maximal number of arrays to compare in each alias group."),
+    cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
+
+static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
+    "polly-rtc-max-array-disjuncts",
+    cl::desc("The maximal number of disjunts allowed in memory accesses to "
+             "to build RTCs."),
+    cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
+
+static cl::opt<unsigned> RunTimeChecksMaxParameters(
+    "polly-rtc-max-parameters",
+    cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
+    cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
+
 static cl::opt<bool> UnprofitableScalarAccs(
     "polly-unprofitable-scalar-accs",
     cl::desc("Count statements with scalar accesses as not optimizable"),
@@ -1801,6 +1824,309 @@ void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
   }
 }
 
+/// Add the minimal/maximal access in @p Set to @p User.
+///
+/// @return True if more accesses should be added, false if we reached the
+///         maximal number of run-time checks to be generated.
+static bool buildMinMaxAccess(isl::set Set,
+                              Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
+  isl::pw_multi_aff MinPMA, MaxPMA;
+  isl::pw_aff LastDimAff;
+  isl::aff OneAff;
+  unsigned Pos;
+
+  Set = Set.remove_divs();
+  polly::simplify(Set);
+
+  if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts)
+    Set = Set.simple_hull();
+
+  // Restrict the number of parameters involved in the access as the lexmin/
+  // lexmax computation will take too long if this number is high.
+  //
+  // Experiments with a simple test case using an i7 4800MQ:
+  //
+  //  #Parameters involved | Time (in sec)
+  //            6          |     0.01
+  //            7          |     0.04
+  //            8          |     0.12
+  //            9          |     0.40
+  //           10          |     1.54
+  //           11          |     6.78
+  //           12          |    30.38
+  //
+  if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
+    unsigned InvolvedParams = 0;
+    for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
+      if (Set.involves_dims(isl::dim::param, u, 1))
+        InvolvedParams++;
+
+    if (InvolvedParams > RunTimeChecksMaxParameters)
+      return false;
+  }
+
+  MinPMA = Set.lexmin_pw_multi_aff();
+  MaxPMA = Set.lexmax_pw_multi_aff();
+
+  MinPMA = MinPMA.coalesce();
+  MaxPMA = MaxPMA.coalesce();
+
+  // Adjust the last dimension of the maximal access by one as we want to
+  // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
+  // we test during code generation might now point after the end of the
+  // allocated array but we will never dereference it anyway.
+  assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) &&
+         "Assumed at least one output dimension");
+
+  Pos = MaxPMA.dim(isl::dim::out) - 1;
+  LastDimAff = MaxPMA.get_pw_aff(Pos);
+  OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
+  OneAff = OneAff.add_constant_si(1);
+  LastDimAff = LastDimAff.add(OneAff);
+  MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
+
+  if (!MinPMA || !MaxPMA)
+    return false;
+
+  MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
+
+  return true;
+}
+
+/// Wrapper function to calculate minimal/maximal accesses to each array.
+bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
+                                        Scop::MinMaxVectorTy &MinMaxAccesses) {
+  MinMaxAccesses.reserve(AliasGroup.size());
+
+  isl::union_set Domains = scop->getDomains();
+  isl::union_map Accesses = isl::union_map::empty(scop->getParamSpace());
+
+  for (MemoryAccess *MA : AliasGroup)
+    Accesses = Accesses.add_map(MA->getAccessRelation());
+
+  Accesses = Accesses.intersect_domain(Domains);
+  isl::union_set Locations = Accesses.range();
+
+  bool LimitReached = false;
+  for (isl::set Set : Locations.get_set_list()) {
+    LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
+    if (LimitReached)
+      break;
+  }
+
+  return !LimitReached;
+}
+
+static isl::set getAccessDomain(MemoryAccess *MA) {
+  isl::set Domain = MA->getStatement()->getDomain();
+  Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim());
+  return Domain.reset_tuple_id();
+}
+
+bool ScopBuilder::buildAliasChecks() {
+  if (!PollyUseRuntimeAliasChecks)
+    return true;
+
+  if (buildAliasGroups()) {
+    // Aliasing assumptions do not go through addAssumption but we still want to
+    // collect statistics so we do it here explicitly.
+    if (scop->getAliasGroups().size())
+      Scop::incrementNumberOfAliasingAssumptions(1);
+    return true;
+  }
+
+  // If a problem occurs while building the alias groups we need to delete
+  // this SCoP and pretend it wasn't valid in the first place. To this end
+  // we make the assumed context infeasible.
+  scop->invalidate(ALIASING, DebugLoc());
+
+  LLVM_DEBUG(
+      dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
+             << " could not be created as the number of parameters involved "
+                "is too high. The SCoP will be "
+                "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
+                "the maximal number of parameters but be advised that the "
+                "compile time might increase exponentially.\n\n");
+  return false;
+}
+
+std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
+ScopBuilder::buildAliasGroupsForAccesses() {
+  AliasSetTracker AST(AA);
+
+  DenseMap<Value *, MemoryAccess *> PtrToAcc;
+  DenseSet<const ScopArrayInfo *> HasWriteAccess;
+  for (ScopStmt &Stmt : *scop) {
+
+    isl::set StmtDomain = Stmt.getDomain();
+    bool StmtDomainEmpty = StmtDomain.is_empty();
+
+    // Statements with an empty domain will never be executed.
+    if (StmtDomainEmpty)
+      continue;
+
+    for (MemoryAccess *MA : Stmt) {
+      if (MA->isScalarKind())
+        continue;
+      if (!MA->isRead())
+        HasWriteAccess.insert(MA->getScopArrayInfo());
+      MemAccInst Acc(MA->getAccessInstruction());
+      if (MA->isRead() && isa<MemTransferInst>(Acc))
+        PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
+      else
+        PtrToAcc[Acc.getPointerOperand()] = MA;
+      AST.add(Acc);
+    }
+  }
+
+  AliasGroupVectorTy AliasGroups;
+  for (AliasSet &AS : AST) {
+    if (AS.isMustAlias() || AS.isForwardingAliasSet())
+      continue;
+    AliasGroupTy AG;
+    for (auto &PR : AS)
+      AG.push_back(PtrToAcc[PR.getValue()]);
+    if (AG.size() < 2)
+      continue;
+    AliasGroups.push_back(std::move(AG));
+  }
+
+  return std::make_tuple(AliasGroups, HasWriteAccess);
+}
+
+bool ScopBuilder::buildAliasGroups() {
+  // To create sound alias checks we perform the following steps:
+  //   o) We partition each group into read only and non read only accesses.
+  //   o) For each group with more than one base pointer we then compute minimal
+  //      and maximal accesses to each array of a group in read only and non
+  //      read only partitions separately.
+  AliasGroupVectorTy AliasGroups;
+  DenseSet<const ScopArrayInfo *> HasWriteAccess;
+
+  std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
+
+  splitAliasGroupsByDomain(AliasGroups);
+
+  for (AliasGroupTy &AG : AliasGroups) {
+    if (!scop->hasFeasibleRuntimeContext())
+      return false;
+
+    {
+      IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
+      bool Valid = buildAliasGroup(AG, HasWriteAccess);
+      if (!Valid)
+        return false;
+    }
+    if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
+      scop->invalidate(COMPLEXITY, DebugLoc());
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool ScopBuilder::buildAliasGroup(
+    AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
+  AliasGroupTy ReadOnlyAccesses;
+  AliasGroupTy ReadWriteAccesses;
+  SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
+  SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
+
+  if (AliasGroup.size() < 2)
+    return true;
+
+  for (MemoryAccess *Access : AliasGroup) {
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
+                                        Access->getAccessInstruction())
+             << "Possibly aliasing pointer, use restrict keyword.");
+    const ScopArrayInfo *Array = Access->getScopArrayInfo();
+    if (HasWriteAccess.count(Array)) {
+      ReadWriteArrays.insert(Array);
+      ReadWriteAccesses.push_back(Access);
+    } else {
+      ReadOnlyArrays.insert(Array);
+      ReadOnlyAccesses.push_back(Access);
+    }
+  }
+
+  // If there are no read-only pointers, and less than two read-write pointers,
+  // no alias check is needed.
+  if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
+    return true;
+
+  // If there is no read-write pointer, no alias check is needed.
+  if (ReadWriteArrays.empty())
+    return true;
+
+  // For non-affine accesses, no alias check can be generated as we cannot
+  // compute a sufficiently tight lower and upper bound: bail out.
+  for (MemoryAccess *MA : AliasGroup) {
+    if (!MA->isAffine()) {
+      scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
+                       MA->getAccessInstruction()->getParent());
+      return false;
+    }
+  }
+
+  // Ensure that for all memory accesses for which we generate alias checks,
+  // their base pointers are available.
+  for (MemoryAccess *MA : AliasGroup) {
+    if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
+      scop->addRequiredInvariantLoad(
+          cast<LoadInst>(BasePtrMA->getAccessInstruction()));
+  }
+
+  //  scop->getAliasGroups().emplace_back();
+  //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
+  Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
+  Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
+
+  bool Valid;
+
+  Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
+
+  if (!Valid)
+    return false;
+
+  // Bail out if the number of values we need to compare is too large.
+  // This is important as the number of comparisons grows quadratically with
+  // the number of values we need to compare.
+  if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
+      RunTimeChecksMaxArraysPerGroup)
+    return false;
+
+  Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
+
+  scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
+  if (!Valid)
+    return false;
+
+  return true;
+}
+
+void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
+  for (unsigned u = 0; u < AliasGroups.size(); u++) {
+    AliasGroupTy NewAG;
+    AliasGroupTy &AG = AliasGroups[u];
+    AliasGroupTy::iterator AGI = AG.begin();
+    isl::set AGDomain = getAccessDomain(*AGI);
+    while (AGI != AG.end()) {
+      MemoryAccess *MA = *AGI;
+      isl::set MADomain = getAccessDomain(MA);
+      if (AGDomain.is_disjoint(MADomain)) {
+        NewAG.push_back(MA);
+        AGI = AG.erase(AGI);
+      } else {
+        AGDomain = AGDomain.unite(MADomain);
+        AGI++;
+      }
+    }
+    if (NewAG.size() > 1)
+      AliasGroups.push_back(std::move(NewAG));
+  }
+}
+
 #ifndef NDEBUG
 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
@@ -1879,8 +2205,7 @@ static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
                            : RN->getNodeAs<BasicBlock>();
 }
 
-void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
-                            OptimizationRemarkEmitter &ORE) {
+void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE));
 
   buildStmts(R);
@@ -2009,7 +2334,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC,
   addRecordedAssumptions();
 
   scop->simplifyContexts();
-  if (!scop->buildAliasChecks(AA)) {
+  if (!buildAliasChecks()) {
     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
     return;
   }
@@ -2035,7 +2360,7 @@ ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
                          ScopDetection &SD, ScalarEvolution &SE,
                          OptimizationRemarkEmitter &ORE)
-    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) {
+    : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
   DebugLoc Beg, End;
   auto P = getBBPairForRegion(R);
   getDebugLocations(P, Beg, End);
@@ -2044,7 +2369,7 @@ ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
            << Msg);
 
-  buildScop(*R, AC, ORE);
+  buildScop(*R, AC);
 
   LLVM_DEBUG(dbgs() << *scop);
 
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 2b0d8052aa03e..9244796a23c6c 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -117,34 +117,11 @@ int const polly::MaxDisjunctsInDomain = 20;
 // number of disjunct when adding non-convex sets to the context.
 static int const MaxDisjunctsInContext = 4;
 
-static cl::opt<int>
-    OptComputeOut("polly-analysis-computeout",
-                  cl::desc("Bound the scop analysis by a maximal amount of "
-                           "computational steps (0 means no bound)"),
-                  cl::Hidden, cl::init(800000), cl::ZeroOrMore,
-                  cl::cat(PollyCategory));
-
 static cl::opt<bool> PollyRemarksMinimal(
     "polly-remarks-minimal",
     cl::desc("Do not emit remarks about assumptions that are known"),
     cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
 
-static cl::opt<int> RunTimeChecksMaxAccessDisjuncts(
-    "polly-rtc-max-array-disjuncts",
-    cl::desc("The maximal number of disjunts allowed in memory accesses to "
-             "to build RTCs."),
-    cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
-
-static cl::opt<unsigned> RunTimeChecksMaxParameters(
-    "polly-rtc-max-parameters",
-    cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
-    cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
-
-static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
-    "polly-rtc-max-arrays-per-group",
-    cl::desc("The maximal number of arrays to compare in each alias group."),
-    cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
-
 static cl::opt<std::string> UserContextStr(
     "polly-context", cl::value_desc("isl parameter set"),
     cl::desc("Provide additional constraints on the context parameters"),
@@ -1963,11 +1940,6 @@ isl::id Scop::getIdForParam(const SCEV *Parameter) const {
   return ParameterIds.lookup(Parameter);
 }
 
-isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
-  isl::set DomainContext = getDomains().params();
-  return C.intersect_params(DomainContext);
-}
-
 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const {
   return DT.dominates(BB, getEntry());
 }
@@ -2205,105 +2177,6 @@ void Scop::simplifyContexts() {
   InvalidContext = InvalidContext.align_params(getParamSpace());
 }
 
-/// Add the minimal/maximal access in @p Set to @p User.
-///
-/// @return True if more accesses should be added, false if we reached the
-///         maximal number of run-time checks to be generated.
-static bool buildMinMaxAccess(isl::set Set,
-                              Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
-  isl::pw_multi_aff MinPMA, MaxPMA;
-  isl::pw_aff LastDimAff;
-  isl::aff OneAff;
-  unsigned Pos;
-
-  Set = Set.remove_divs();
-  polly::simplify(Set);
-
-  if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts)
-    Set = Set.simple_hull();
-
-  // Restrict the number of parameters involved in the access as the lexmin/
-  // lexmax computation will take too long if this number is high.
-  //
-  // Experiments with a simple test case using an i7 4800MQ:
-  //
-  //  #Parameters involved | Time (in sec)
-  //            6          |     0.01
-  //            7          |     0.04
-  //            8          |     0.12
-  //            9          |     0.40
-  //           10          |     1.54
-  //           11          |     6.78
-  //           12          |    30.38
-  //
-  if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) {
-    unsigned InvolvedParams = 0;
-    for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
-      if (Set.involves_dims(isl::dim::param, u, 1))
-        InvolvedParams++;
-
-    if (InvolvedParams > RunTimeChecksMaxParameters)
-      return false;
-  }
-
-  MinPMA = Set.lexmin_pw_multi_aff();
-  MaxPMA = Set.lexmax_pw_multi_aff();
-
-  MinPMA = MinPMA.coalesce();
-  MaxPMA = MaxPMA.coalesce();
-
-  // Adjust the last dimension of the maximal access by one as we want to
-  // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
-  // we test during code generation might now point after the end of the
-  // allocated array but we will never dereference it anyway.
-  assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) &&
-         "Assumed at least one output dimension");
-
-  Pos = MaxPMA.dim(isl::dim::out) - 1;
-  LastDimAff = MaxPMA.get_pw_aff(Pos);
-  OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
-  OneAff = OneAff.add_constant_si(1);
-  LastDimAff = LastDimAff.add(OneAff);
-  MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
-
-  if (!MinPMA || !MaxPMA)
-    return false;
-
-  MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
-
-  return true;
-}
-
-static isl::set getAccessDomain(MemoryAccess *MA) {
-  isl::set Domain = MA->getStatement()->getDomain();
-  Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim());
-  return Domain.reset_tuple_id();
-}
-
-/// Wrapper function to calculate minimal/maximal accesses to each array.
-static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S,
-                                  Scop::MinMaxVectorTy &MinMaxAccesses) {
-  MinMaxAccesses.reserve(AliasGroup.size());
-
-  isl::union_set Domains = S.getDomains();
-  isl::union_map Accesses = isl::union_map::empty(S.getParamSpace());
-
-  for (MemoryAccess *MA : AliasGroup)
-    Accesses = Accesses.add_map(MA->getAccessRelation());
-
-  Accesses = Accesses.intersect_domain(Domains);
-  isl::union_set Locations = Accesses.range();
-
-  bool LimitReached = false;
-  for (isl::set Set : Locations.get_set_list()) {
-    LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, S);
-    if (LimitReached)
-      break;
-  }
-
-  return !LimitReached;
-}
-
 /// Helper to treat non-affine regions and basic blocks the same.
 ///
 ///{
@@ -2960,225 +2833,6 @@ bool Scop::addLoopBoundsToHeaderDomain(
   return true;
 }
 
-MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
-  Value *PointerBase = MA->getOriginalBaseAddr();
-
-  auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
-  if (!PointerBaseInst)
-    return nullptr;
-
-  auto *BasePtrStmt = getStmtFor(PointerBaseInst);
-  if (!BasePtrStmt)
-    return nullptr;
-
-  return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
-}
-
-bool Scop::buildAliasChecks(AliasAnalysis &AA) {
-  if (!PollyUseRuntimeAliasChecks)
-    return true;
-
-  if (buildAliasGroups(AA)) {
-    // Aliasing assumptions do not go through addAssumption but we still want to
-    // collect statistics so we do it here explicitly.
-    if (MinMaxAliasGroups.size())
-      AssumptionsAliasing++;
-    return true;
-  }
-
-  // If a problem occurs while building the alias groups we need to delete
-  // this SCoP and pretend it wasn't valid in the first place. To this end
-  // we make the assumed context infeasible.
-  invalidate(ALIASING, DebugLoc());
-
-  LLVM_DEBUG(
-      dbgs() << "\n\nNOTE: Run time checks for " << getNameStr()
-             << " could not be created as the number of parameters involved "
-                "is too high. The SCoP will be "
-                "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust "
-                "the maximal number of parameters but be advised that the "
-                "compile time might increase exponentially.\n\n");
-  return false;
-}
-
-std::tuple<Scop::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
-Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) {
-  AliasSetTracker AST(AA);
-
-  DenseMap<Value *, MemoryAccess *> PtrToAcc;
-  DenseSet<const ScopArrayInfo *> HasWriteAccess;
-  for (ScopStmt &Stmt : *this) {
-
-    isl::set StmtDomain = Stmt.getDomain();
-    bool StmtDomainEmpty = StmtDomain.is_empty();
-
-    // Statements with an empty domain will never be executed.
-    if (StmtDomainEmpty)
-      continue;
-
-    for (MemoryAccess *MA : Stmt) {
-      if (MA->isScalarKind())
-        continue;
-      if (!MA->isRead())
-        HasWriteAccess.insert(MA->getScopArrayInfo());
-      MemAccInst Acc(MA->getAccessInstruction());
-      if (MA->isRead() && isa<MemTransferInst>(Acc))
-        PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
-      else
-        PtrToAcc[Acc.getPointerOperand()] = MA;
-      AST.add(Acc);
-    }
-  }
-
-  AliasGroupVectorTy AliasGroups;
-  for (AliasSet &AS : AST) {
-    if (AS.isMustAlias() || AS.isForwardingAliasSet())
-      continue;
-    AliasGroupTy AG;
-    for (auto &PR : AS)
-      AG.push_back(PtrToAcc[PR.getValue()]);
-    if (AG.size() < 2)
-      continue;
-    AliasGroups.push_back(std::move(AG));
-  }
-
-  return std::make_tuple(AliasGroups, HasWriteAccess);
-}
-
-void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
-  for (unsigned u = 0; u < AliasGroups.size(); u++) {
-    AliasGroupTy NewAG;
-    AliasGroupTy &AG = AliasGroups[u];
-    AliasGroupTy::iterator AGI = AG.begin();
-    isl::set AGDomain = getAccessDomain(*AGI);
-    while (AGI != AG.end()) {
-      MemoryAccess *MA = *AGI;
-      isl::set MADomain = getAccessDomain(MA);
-      if (AGDomain.is_disjoint(MADomain)) {
-        NewAG.push_back(MA);
-        AGI = AG.erase(AGI);
-      } else {
-        AGDomain = AGDomain.unite(MADomain);
-        AGI++;
-      }
-    }
-    if (NewAG.size() > 1)
-      AliasGroups.push_back(std::move(NewAG));
-  }
-}
-
-bool Scop::buildAliasGroups(AliasAnalysis &AA) {
-  // To create sound alias checks we perform the following steps:
-  //   o) We partition each group into read only and non read only accesses.
-  //   o) For each group with more than one base pointer we then compute minimal
-  //      and maximal accesses to each array of a group in read only and non
-  //      read only partitions separately.
-  AliasGroupVectorTy AliasGroups;
-  DenseSet<const ScopArrayInfo *> HasWriteAccess;
-
-  std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA);
-
-  splitAliasGroupsByDomain(AliasGroups);
-
-  for (AliasGroupTy &AG : AliasGroups) {
-    if (!hasFeasibleRuntimeContext())
-      return false;
-
-    {
-      IslMaxOperationsGuard MaxOpGuard(getIslCtx().get(), OptComputeOut);
-      bool Valid = buildAliasGroup(AG, HasWriteAccess);
-      if (!Valid)
-        return false;
-    }
-    if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota) {
-      invalidate(COMPLEXITY, DebugLoc());
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup,
-                           DenseSet<const ScopArrayInfo *> HasWriteAccess) {
-  AliasGroupTy ReadOnlyAccesses;
-  AliasGroupTy ReadWriteAccesses;
-  SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
-  SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
-
-  if (AliasGroup.size() < 2)
-    return true;
-
-  for (MemoryAccess *Access : AliasGroup) {
-    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
-                                        Access->getAccessInstruction())
-             << "Possibly aliasing pointer, use restrict keyword.");
-    const ScopArrayInfo *Array = Access->getScopArrayInfo();
-    if (HasWriteAccess.count(Array)) {
-      ReadWriteArrays.insert(Array);
-      ReadWriteAccesses.push_back(Access);
-    } else {
-      ReadOnlyArrays.insert(Array);
-      ReadOnlyAccesses.push_back(Access);
-    }
-  }
-
-  // If there are no read-only pointers, and less than two read-write pointers,
-  // no alias check is needed.
-  if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
-    return true;
-
-  // If there is no read-write pointer, no alias check is needed.
-  if (ReadWriteArrays.empty())
-    return true;
-
-  // For non-affine accesses, no alias check can be generated as we cannot
-  // compute a sufficiently tight lower and upper bound: bail out.
-  for (MemoryAccess *MA : AliasGroup) {
-    if (!MA->isAffine()) {
-      invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
-                 MA->getAccessInstruction()->getParent());
-      return false;
-    }
-  }
-
-  // Ensure that for all memory accesses for which we generate alias checks,
-  // their base pointers are available.
-  for (MemoryAccess *MA : AliasGroup) {
-    if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA))
-      addRequiredInvariantLoad(
-          cast<LoadInst>(BasePtrMA->getAccessInstruction()));
-  }
-
-  MinMaxAliasGroups.emplace_back();
-  MinMaxVectorPairTy &pair = MinMaxAliasGroups.back();
-  MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first;
-  MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second;
-
-  bool Valid;
-
-  Valid =
-      calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite);
-
-  if (!Valid)
-    return false;
-
-  // Bail out if the number of values we need to compare is too large.
-  // This is important as the number of comparisons grows quadratically with
-  // the number of values we need to compare.
-  if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
-      RunTimeChecksMaxArraysPerGroup)
-    return false;
-
-  Valid =
-      calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly);
-
-  if (!Valid)
-    return false;
-
-  return true;
-}
-
 /// Get the smallest loop that contains @p S but is not in @p S.
 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
   // Start with the smallest loop containing the entry and expand that
@@ -3647,11 +3301,30 @@ bool Scop::hasFeasibleRuntimeContext() const {
 
   auto DomainContext = getDomains().params();
   IsFeasible = !DomainContext.is_subset(NegativeContext);
-  IsFeasible &= !Context.is_subset(NegativeContext);
+  IsFeasible &= !getContext().is_subset(NegativeContext);
 
   return IsFeasible;
 }
 
+isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const {
+  isl::set DomainContext = getDomains().params();
+  return C.intersect_params(DomainContext);
+}
+
+MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) {
+  Value *PointerBase = MA->getOriginalBaseAddr();
+
+  auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase);
+  if (!PointerBaseInst)
+    return nullptr;
+
+  auto *BasePtrStmt = getStmtFor(PointerBaseInst);
+  if (!BasePtrStmt)
+    return nullptr;
+
+  return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst);
+}
+
 static std::string toString(AssumptionKind Kind) {
   switch (Kind) {
   case ALIASING:
@@ -4380,6 +4053,10 @@ bool Scop::isEscaping(Instruction *Inst) {
   return false;
 }
 
+void Scop::incrementNumberOfAliasingAssumptions(unsigned step) {
+  AssumptionsAliasing += step;
+}
+
 Scop::ScopStatistics Scop::getStatistics() const {
   ScopStatistics Result;
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)

From e559f62506423d5df23355862e11d233ff3c5242 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 16 Jul 2019 21:13:40 +0000
Subject: [PATCH 288/451] [libcxx] Rejigger test for destroying delete
 feature-test macros

In r361572, we introduced library support for C++20 destroying delete
and decided to only define the library feature-test macro when the
compiler supports the underlying language feature. This patch reworks
the tests to mirror that.

llvm-svn: 366263
---
 .../destroying_delete_t.pass.cpp                | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
index 834222c06bfc0..fa4e77c71f759 100644
--- a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp
@@ -52,10 +52,19 @@ void A::operator delete(A* a, std::destroying_delete_t) {
   ::operator delete(a);
 }
 
-#ifndef __cpp_lib_destroying_delete
-#error "Expected __cpp_lib_destroying_delete to be defined"
-#elif __cpp_lib_destroying_delete < 201806L
-#error "Unexpected value of __cpp_lib_destroying_delete"
+// Only test the definition of the library feature-test macro when the compiler
+// supports the feature -- otherwise we don't define the library feature-test
+// macro.
+#if defined(__cpp_impl_destroying_delete)
+#  if !defined(__cpp_lib_destroying_delete)
+#    error "Expected __cpp_lib_destroying_delete to be defined"
+#  elif __cpp_lib_destroying_delete < 201806L
+#    error "Unexpected value of __cpp_lib_destroying_delete"
+#  endif
+#else
+#  if defined(__cpp_lib_destroying_delete)
+#    error "The library feature-test macro for destroying delete shouldn't be defined when the compiler doesn't support the language feature"
+#  endif
 #endif
 
 int main() {

From 40580d36c4de7dfbff897ab72cc4e535c33d09c5 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Tue, 16 Jul 2019 21:15:19 +0000
Subject: [PATCH 289/451] DWARF: Skip zero column for inline call sites

D64033 <https://reviews.llvm.org/D64033> added DW_AT_call_column for
inline sites. However, that change wasn't aware of "-gno-column-info".
To avoid adding column info when "-gno-column-info" is used, now
DW_AT_call_column is only added when we have non-zero column (when
"-gno-column-info" is used, column will be zero).

Patch by Wenlei He!

Differential Revision: https://reviews.llvm.org/D64784

llvm-svn: 366264
---
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 3 ++-
 llvm/test/DebugInfo/X86/fission-inline.ll        | 6 +++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 8862fa17e5b60..9548ad9918c1f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -543,7 +543,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
           getOrCreateSourceID(IA->getFile()));
   addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
-  addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
+  if (IA->getColumn())
+    addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
   if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
     addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
             IA->getDiscriminator());
diff --git a/llvm/test/DebugInfo/X86/fission-inline.ll b/llvm/test/DebugInfo/X86/fission-inline.ll
index 0702465e60e3f..0fb4b83bdf933 100644
--- a/llvm/test/DebugInfo/X86/fission-inline.ll
+++ b/llvm/test/DebugInfo/X86/fission-inline.ll
@@ -71,6 +71,8 @@
 ; CHECK:     DW_AT_call_file
 ; CHECK-NEXT:     DW_AT_call_line {{.*}} (18)
 ; CHECK-NEXT:     DW_AT_call_column {{.*}} (0x05)
+; CHECK:     DW_AT_call_file
+; CHECK-NEXT:     DW_AT_call_line {{.*}} (21)
 ; CHECK-NOT: DW_
 ; CHECK: .debug_info.dwo contents:
 
@@ -82,6 +84,7 @@ entry:
   call void @_Z2f1v(), !dbg !26
   call void @_Z2f1v(), !dbg !25
   call void @_Z2f1v(), !dbg !28
+  call void @_Z2f1v(), !dbg !29
   ret void, !dbg !29
 }
 
@@ -122,4 +125,5 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !26 = !DILocation(line: 11, column: 3, scope: !11, inlinedAt: !27)
 !27 = !DILocation(line: 18, column: 5, scope: !20)
 !28 = !DILocation(line: 12, column: 3, scope: !11, inlinedAt: !27)
-!29 = !DILocation(line: 21, column: 1, scope: !10)
+!29 = !DILocation(line: 12, column: 3, scope: !11, inlinedAt: !30)
+!30 = !DILocation(line: 21, column: 0, scope: !10)

From ffca322266fcd431f7f17ae42ef3f665a3157d39 Mon Sep 17 00:00:00 2001
From: Matthias Gehre <M.Gehre@gmx.de>
Date: Tue, 16 Jul 2019 21:19:00 +0000
Subject: [PATCH 290/451] [clang-tidy] initial version of
 readability-convert-member-functions-to-static

Summary:
Finds non-static member functions that can be made ``static``.

I have run this check (repeatedly) over llvm-project. It made 1708 member functions
``static``. Out of those, I had to exclude 22 via ``NOLINT`` because their address
was taken and stored in a variable of pointer-to-member type (e.g. passed to
llvm::StringSwitch).
It also made 243 member functions ``const``. (This is currently very conservative
to have no false-positives and can hopefully be extended in the future.)

You can find the results here: https://github.com/mgehre/llvm-project/commits/static_const_eval

Reviewers: alexfh, aaron.ballman

Subscribers: mgorny, xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61749

llvm-svn: 366265
---
 .../clang-tidy/readability/CMakeLists.txt     |   1 +
 .../ConvertMemberFunctionsToStatic.cpp        | 172 ++++++++++++++
 .../ConvertMemberFunctionsToStatic.h          |  37 +++
 .../readability/ReadabilityTidyModule.cpp     |   3 +
 clang-tools-extra/docs/ReleaseNotes.rst       |   5 +
 .../docs/clang-tidy/checks/list.rst           |   1 +
 ...ity-convert-member-functions-to-static.rst |  14 ++
 ...ity-convert-member-functions-to-static.cpp | 218 ++++++++++++++++++
 8 files changed, 451 insertions(+)
 create mode 100644 clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp
 create mode 100644 clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp

diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
index b48e307e6153f..2d226b10334af 100644
--- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
@@ -5,6 +5,7 @@ add_clang_library(clangTidyReadabilityModule
   BracesAroundStatementsCheck.cpp
   ConstReturnTypeCheck.cpp
   ContainerSizeEmptyCheck.cpp
+  ConvertMemberFunctionsToStatic.cpp
   DeleteNullPointerCheck.cpp
   DeletedDefaultCheck.cpp
   ElseAfterReturnCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp
new file mode 100644
index 0000000000000..f0e5b29dcab60
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp
@@ -0,0 +1,172 @@
+//===--- ConvertMemberFunctionsToStatic.cpp - clang-tidy ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConvertMemberFunctionsToStatic.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceLocation.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace readability {
+
+AST_MATCHER(CXXMethodDecl, isStatic) { return Node.isStatic(); }
+
+AST_MATCHER(CXXMethodDecl, hasTrivialBody) { return Node.hasTrivialBody(); }
+
+AST_MATCHER(CXXMethodDecl, isOverloadedOperator) {
+  return Node.isOverloadedOperator();
+}
+
+AST_MATCHER(CXXRecordDecl, hasAnyDependentBases) {
+  return Node.hasAnyDependentBases();
+}
+
+AST_MATCHER(CXXMethodDecl, isTemplate) {
+  return Node.getTemplatedKind() != FunctionDecl::TK_NonTemplate;
+}
+
+AST_MATCHER(CXXMethodDecl, isDependentContext) {
+  return Node.isDependentContext();
+}
+
+AST_MATCHER(CXXMethodDecl, isInsideMacroDefinition) {
+  const ASTContext &Ctxt = Finder->getASTContext();
+  return clang::Lexer::makeFileCharRange(
+             clang::CharSourceRange::getCharRange(
+                 Node.getTypeSourceInfo()->getTypeLoc().getSourceRange()),
+             Ctxt.getSourceManager(), Ctxt.getLangOpts())
+      .isInvalid();
+}
+
+AST_MATCHER_P(CXXMethodDecl, hasCanonicalDecl,
+              ast_matchers::internal::Matcher<CXXMethodDecl>, InnerMatcher) {
+  return InnerMatcher.matches(*Node.getCanonicalDecl(), Finder, Builder);
+}
+
+AST_MATCHER(CXXMethodDecl, usesThis) {
+  class FindUsageOfThis : public RecursiveASTVisitor<FindUsageOfThis> {
+  public:
+    bool Used = false;
+
+    bool VisitCXXThisExpr(const CXXThisExpr *E) {
+      Used = true;
+      return false; // Stop traversal.
+    }
+  } UsageOfThis;
+
+  // TraverseStmt does not modify its argument.
+  UsageOfThis.TraverseStmt(const_cast<Stmt *>(Node.getBody()));
+
+  return UsageOfThis.Used;
+}
+
+void ConvertMemberFunctionsToStatic::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(
+      cxxMethodDecl(
+          isDefinition(), isUserProvided(),
+          unless(anyOf(
+              isExpansionInSystemHeader(), isVirtual(), isStatic(),
+              hasTrivialBody(), isOverloadedOperator(), cxxConstructorDecl(),
+              cxxDestructorDecl(), cxxConversionDecl(), isTemplate(),
+              isDependentContext(),
+              ofClass(anyOf(
+                  isLambda(),
+                  hasAnyDependentBases()) // Method might become virtual
+                                          // depending on template base class.
+                      ),
+              isInsideMacroDefinition(),
+              hasCanonicalDecl(isInsideMacroDefinition()), usesThis())))
+          .bind("x"),
+      this);
+}
+
+/// \brief Obtain the original source code text from a SourceRange.
+static StringRef getStringFromRange(SourceManager &SourceMgr,
+                                    const LangOptions &LangOpts,
+                                    SourceRange Range) {
+  if (SourceMgr.getFileID(Range.getBegin()) !=
+      SourceMgr.getFileID(Range.getEnd()))
+    return {};
+
+  return Lexer::getSourceText(CharSourceRange(Range, true), SourceMgr,
+                              LangOpts);
+}
+
+static SourceRange getLocationOfConst(const TypeSourceInfo *TSI,
+                                      SourceManager &SourceMgr,
+                                      const LangOptions &LangOpts) {
+  assert(TSI);
+  const auto FTL = TSI->getTypeLoc().IgnoreParens().getAs<FunctionTypeLoc>();
+  assert(FTL);
+
+  SourceRange Range{FTL.getRParenLoc().getLocWithOffset(1),
+                    FTL.getLocalRangeEnd()};
+  // Inside Range, there might be other keywords and trailing return types.
+  // Find the exact position of "const".
+  StringRef Text = getStringFromRange(SourceMgr, LangOpts, Range);
+  size_t Offset = Text.find("const");
+  if (Offset == StringRef::npos)
+    return {};
+
+  SourceLocation Start = Range.getBegin().getLocWithOffset(Offset);
+  return {Start, Start.getLocWithOffset(strlen("const") - 1)};
+}
+
+void ConvertMemberFunctionsToStatic::check(
+    const MatchFinder::MatchResult &Result) {
+  const auto *Definition = Result.Nodes.getNodeAs<CXXMethodDecl>("x");
+
+  // TODO: For out-of-line declarations, don't modify the source if the header
+  // is excluded by the -header-filter option.
+  DiagnosticBuilder Diag =
+      diag(Definition->getLocation(), "method %0 can be made static")
+      << Definition;
+
+  // TODO: Would need to remove those in a fix-it.
+  if (Definition->getMethodQualifiers().hasVolatile() ||
+      Definition->getMethodQualifiers().hasRestrict() ||
+      Definition->getRefQualifier() != RQ_None)
+    return;
+
+  const CXXMethodDecl *Declaration = Definition->getCanonicalDecl();
+
+  if (Definition->isConst()) {
+    // Make sure that we either remove 'const' on both declaration and
+    // definition or emit no fix-it at all.
+    SourceRange DefConst = getLocationOfConst(Definition->getTypeSourceInfo(),
+                                              *Result.SourceManager,
+                                              Result.Context->getLangOpts());
+
+    if (DefConst.isInvalid())
+      return;
+
+    if (Declaration != Definition) {
+      SourceRange DeclConst = getLocationOfConst(
+          Declaration->getTypeSourceInfo(), *Result.SourceManager,
+          Result.Context->getLangOpts());
+
+      if (DeclConst.isInvalid())
+        return;
+      Diag << FixItHint::CreateRemoval(DeclConst);
+    }
+
+    // Remove existing 'const' from both declaration and definition.
+    Diag << FixItHint::CreateRemoval(DefConst);
+  }
+  Diag << FixItHint::CreateInsertion(Declaration->getBeginLoc(), "static ");
+}
+
+} // namespace readability
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h
new file mode 100644
index 0000000000000..d9947650ab539
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h
@@ -0,0 +1,37 @@
+//===--- ConvertMemberFunctionsToStatic.h - clang-tidy ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H
+
+#include "../ClangTidy.h"
+
+namespace clang {
+namespace tidy {
+namespace readability {
+
+/// This check finds C++ class methods than can be made static
+/// because they don't use the 'this' pointer.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/
+/// readability-convert-member-functions-to-static.html
+class ConvertMemberFunctionsToStatic : public ClangTidyCheck {
+public:
+  ConvertMemberFunctionsToStatic(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+};
+
+} // namespace readability
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H
diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
index 5b2aed421bf11..5005ba3df61fd 100644
--- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
@@ -13,6 +13,7 @@
 #include "BracesAroundStatementsCheck.h"
 #include "ConstReturnTypeCheck.h"
 #include "ContainerSizeEmptyCheck.h"
+#include "ConvertMemberFunctionsToStatic.h"
 #include "DeleteNullPointerCheck.h"
 #include "DeletedDefaultCheck.h"
 #include "ElseAfterReturnCheck.h"
@@ -57,6 +58,8 @@ class ReadabilityModule : public ClangTidyModule {
         "readability-const-return-type");
     CheckFactories.registerCheck<ContainerSizeEmptyCheck>(
         "readability-container-size-empty");
+    CheckFactories.registerCheck<ConvertMemberFunctionsToStatic>(
+        "readability-convert-member-functions-to-static");
     CheckFactories.registerCheck<DeleteNullPointerCheck>(
         "readability-delete-null-pointer");
     CheckFactories.registerCheck<DeletedDefaultCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index edc158499dcbd..be79ce7dc479a 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -230,6 +230,11 @@ Improvements to clang-tidy
   If set to true, the check will provide fix-its with literal initializers
   (``int i = 0;``) instead of curly braces (``int i{};``).
 
+- New :doc:`readability-convert-member-functions-to-static
+  <clang-tidy/checks/readability-convert-member-functions-to-static>` check.
+
+  Finds non-static member functions that can be made ``static``.
+
 Improvements to include-fixer
 -----------------------------
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index f0349b9a90e9e..27521c86e9676 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -257,6 +257,7 @@ Clang-Tidy Checks
    readability-braces-around-statements
    readability-const-return-type
    readability-container-size-empty
+   readability-convert-member-functions-to-static
    readability-delete-null-pointer
    readability-deleted-default
    readability-else-after-return
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst
new file mode 100644
index 0000000000000..891f6be637142
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst
@@ -0,0 +1,14 @@
+.. title:: clang-tidy - readability-convert-member-functions-to-static
+
+readability-convert-member-functions-to-static
+==============================================
+
+Finds non-static member functions that can be made ``static``
+because the functions don't use ``this``.
+
+After applying modifications as suggested by the check, runnnig the check again
+might find more opportunities to mark member functions ``static``.
+
+After making a member function ``static``, you might want to run the check
+`readability-static-accessed-through-instance` to replace calls like
+``Instance.method()`` by ``Class::method()``.
diff --git a/clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp b/clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp
new file mode 100644
index 0000000000000..9612fa9de8c20
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp
@@ -0,0 +1,218 @@
+// RUN: %check_clang_tidy %s readability-convert-member-functions-to-static %t
+
+class DoNotMakeEmptyStatic {
+  void emptyMethod() {}
+  void empty_method_out_of_line();
+};
+
+void DoNotMakeEmptyStatic::empty_method_out_of_line() {}
+
+class A {
+  int field;
+  const int const_field;
+  static int static_field;
+
+  void no_use() {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'no_use' can be made static
+    // CHECK-FIXES: {{^}}  static void no_use() {
+    int i = 1;
+  }
+
+  int read_field() {
+    return field;
+  }
+
+  void write_field() {
+    field = 1;
+  }
+
+  int call_non_const_member() { return read_field(); }
+
+  int call_static_member() {
+    // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'call_static_member' can be made static
+    // CHECK-FIXES: {{^}}  static int call_static_member() {
+    already_static();
+  }
+
+  int read_static() {
+    // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'read_static' can be made static
+    // CHECK-FIXES: {{^}}  static int read_static() {
+    return static_field;
+  }
+  void write_static() {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'write_static' can be made static
+    // CHECK-FIXES: {{^}}  static void write_static() {
+    static_field = 1;
+  }
+
+  static int already_static() { return static_field; }
+
+  int already_const() const { return field; }
+
+  int already_const_convert_to_static() const {
+    // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'already_const_convert_to_static' can be made static
+    // CHECK-FIXES: {{^}}  static int already_const_convert_to_static() {
+    return static_field;
+  }
+
+  static int out_of_line_already_static();
+
+  void out_of_line_call_static();
+  // CHECK-FIXES: {{^}}  static void out_of_line_call_static();
+  int out_of_line_const_to_static() const;
+  // CHECK-FIXES: {{^}}  static int out_of_line_const_to_static() ;
+};
+
+int A::out_of_line_already_static() { return 0; }
+
+void A::out_of_line_call_static() {
+  // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: method 'out_of_line_call_static' can be made static
+  // CHECK-FIXES: {{^}}void A::out_of_line_call_static() {
+  already_static();
+}
+
+int A::out_of_line_const_to_static() const {
+  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'out_of_line_const_to_static' can be made static
+  // CHECK-FIXES: {{^}}int A::out_of_line_const_to_static() {
+  return 0;
+}
+
+struct KeepVirtual {
+  virtual int f() { return 0; }
+  virtual int h() const { return 0; }
+};
+
+struct KeepVirtualDerived : public KeepVirtual {
+  int f() { return 0; }
+  int h() const override { return 0; }
+};
+
+// Don't add 'static' to special member functions and operators.
+struct KeepSpecial {
+  KeepSpecial() { int L = 0; }
+  ~KeepSpecial() { int L = 0; }
+  int operator+() { return 0; }
+  operator int() { return 0; }
+};
+
+void KeepLambdas() {
+  using FT = int (*)();
+  auto F = static_cast<FT>([]() { return 0; });
+  auto F2 = []() { return 0; };
+}
+
+template <class Base>
+struct KeepWithTemplateBase : public Base {
+  int i;
+  // We cannot make these methods static because they might need to override
+  // a function from Base.
+  int static_f() { return 0; }
+};
+
+template <class T>
+struct KeepTemplateClass {
+  int i;
+  // We cannot make these methods static because a specialization
+  // might use *this differently.
+  int static_f() { return 0; }
+};
+
+struct KeepTemplateMethod {
+  int i;
+  // We cannot make these methods static because a specialization
+  // might use *this differently.
+  template <class T>
+  static int static_f() { return 0; }
+};
+
+void instantiate() {
+  struct S {};
+  KeepWithTemplateBase<S> I1;
+  I1.static_f();
+
+  KeepTemplateClass<int> I2;
+  I2.static_f();
+
+  KeepTemplateMethod I3;
+  I3.static_f<int>();
+}
+
+struct Trailing {
+  auto g() const -> int {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'g' can be made static
+    // CHECK-FIXES: {{^}}  static auto g() -> int {
+    return 0;
+  }
+
+  void vol() volatile {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'vol' can be made static
+    return;
+  }
+
+  void ref() const & {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'ref' can be made static
+    return;
+  }
+  void refref() const && {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'refref' can be made static
+    return;
+  }
+
+  void restr() __restrict {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'restr' can be made static
+    return;
+  }
+};
+
+struct UnevaluatedContext {
+  void f() { sizeof(this); }
+
+  void noex() noexcept(noexcept(this));
+};
+
+struct LambdaCapturesThis {
+  int Field;
+
+  int explicitCapture() {
+    return [this]() { return Field; }();
+  }
+
+  int implicitCapture() {
+    return [&]() { return Field; }();
+  }
+};
+
+struct NoFixitInMacro {
+#define CONST const
+  int no_use_macro_const() CONST {
+    // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'no_use_macro_const' can be made static
+    return 0;
+  }
+
+#define ADD_CONST(F) F const
+  int ADD_CONST(no_use_macro2()) {
+    return 0;
+  }
+
+#define FUN no_use_macro()
+  int i;
+  int FUN {
+    return i;
+  }
+
+#define T(FunctionName, Keyword) \
+  Keyword int FunctionName() { return 0; }
+#define EMPTY
+  T(A, EMPTY)
+  T(B, static)
+
+#define T2(FunctionName) \
+  int FunctionName() { return 0; }
+  T2(A2)
+
+#define VOLATILE volatile
+  void volatileMacro() VOLATILE {
+    // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'volatileMacro' can be made static
+    return;
+  }
+};

From fdc61bce9470ca7e80125b54d6dfbf24a3cc68c5 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <adamski.dominik@gmail.com>
Date: Tue, 16 Jul 2019 21:29:06 +0000
Subject: [PATCH 291/451] [NFC][ScopBuilder] Move addUserContext to ScopBuilder

Scope of changes:
1) Moved addUserContext to ScopBuilder.
2) Moved command line option UserContextStr to ScopBuilder.

Differential Revision: https://reviews.llvm.org/D63740

llvm-svn: 366266
---
 polly/include/polly/ScopBuilder.h  |  3 ++
 polly/include/polly/ScopInfo.h     |  3 --
 polly/lib/Analysis/ScopBuilder.cpp | 46 +++++++++++++++++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 43 ----------------------------
 4 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index 0cb90e3ca0e6a..c5cbcf8a8ab42 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -376,6 +376,9 @@ class ScopBuilder {
                       BasicBlock *IncomingBlock, Value *IncomingValue,
                       bool IsExitBlock);
 
+  /// Add user provided parameter constraints to context (command line).
+  void addUserContext();
+
   /// Add all recorded assumptions to the assumed context.
   void addRecordedAssumptions();
 
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index c86513a5f62c9..e76442aadcefd 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2044,9 +2044,6 @@ class Scop {
   void addUserAssumptions(AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI,
                           DenseMap<BasicBlock *, isl::set> &InvalidDomainMap);
 
-  /// Add user provided parameter constraints to context (command line).
-  void addUserContext();
-
   /// Add the bounds of the parameters to the context.
   void addParameterBounds();
 
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index f079a749dcb11..ec078be083850 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -114,6 +114,11 @@ static cl::opt<bool> UnprofitableScalarAccs(
     cl::desc("Count statements with scalar accesses as not optimizable"),
     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
 
+static cl::opt<std::string> UserContextStr(
+    "polly-context", cl::value_desc("isl parameter set"),
+    cl::desc("Provide additional constraints on the context parameters"),
+    cl::init(""), cl::cat(PollyCategory));
+
 static cl::opt<bool> DetectFortranArrays(
     "polly-detect-fortran-arrays",
     cl::desc("Detect Fortran arrays and use this for code generation"),
@@ -1454,6 +1459,45 @@ bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
   return false;
 }
 
+void ScopBuilder::addUserContext() {
+  if (UserContextStr.empty())
+    return;
+
+  isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
+  isl::space Space = scop->getParamSpace();
+  if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) {
+    std::string SpaceStr = Space.to_str();
+    errs() << "Error: the context provided in -polly-context has not the same "
+           << "number of dimensions than the computed context. Due to this "
+           << "mismatch, the -polly-context option is ignored. Please provide "
+           << "the context in the parameter space: " << SpaceStr << ".\n";
+    return;
+  }
+
+  for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) {
+    std::string NameContext =
+        scop->getContext().get_dim_name(isl::dim::param, i);
+    std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
+
+    if (NameContext != NameUserContext) {
+      std::string SpaceStr = Space.to_str();
+      errs() << "Error: the name of dimension " << i
+             << " provided in -polly-context "
+             << "is '" << NameUserContext << "', but the name in the computed "
+             << "context is '" << NameContext
+             << "'. Due to this name mismatch, "
+             << "the -polly-context option is ignored. Please provide "
+             << "the context in the parameter space: " << SpaceStr << ".\n";
+      return;
+    }
+
+    UserContext = UserContext.set_dim_id(isl::dim::param, i,
+                                         Space.get_dim_id(isl::dim::param, i));
+  }
+  isl::set newContext = scop->getContext().intersect(UserContext);
+  scop->setContext(newContext);
+}
+
 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
                                          isl::union_map Writes) {
   // TODO: Loads that are not loop carried, hence are in a statement with
@@ -2326,7 +2370,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
   scop->finalizeAccesses();
 
   scop->realignParams();
-  scop->addUserContext();
+  addUserContext();
 
   // After the context was fully constructed, thus all our knowledge about
   // the parameters is in there, we add all recorded assumptions to the
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 9244796a23c6c..9ccd9bf1680fd 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -122,11 +122,6 @@ static cl::opt<bool> PollyRemarksMinimal(
     cl::desc("Do not emit remarks about assumptions that are known"),
     cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory));
 
-static cl::opt<std::string> UserContextStr(
-    "polly-context", cl::value_desc("isl parameter set"),
-    cl::desc("Provide additional constraints on the context parameters"),
-    cl::init(""), cl::cat(PollyCategory));
-
 static cl::opt<bool>
     IslOnErrorAbort("polly-on-isl-error-abort",
                     cl::desc("Abort if an isl error is encountered"),
@@ -2017,44 +2012,6 @@ void Scop::addUserAssumptions(
   }
 }
 
-void Scop::addUserContext() {
-  if (UserContextStr.empty())
-    return;
-
-  isl::set UserContext = isl::set(getIslCtx(), UserContextStr.c_str());
-  isl::space Space = getParamSpace();
-  if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) {
-    std::string SpaceStr = Space.to_str();
-    errs() << "Error: the context provided in -polly-context has not the same "
-           << "number of dimensions than the computed context. Due to this "
-           << "mismatch, the -polly-context option is ignored. Please provide "
-           << "the context in the parameter space: " << SpaceStr << ".\n";
-    return;
-  }
-
-  for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) {
-    std::string NameContext = Context.get_dim_name(isl::dim::param, i);
-    std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
-
-    if (NameContext != NameUserContext) {
-      std::string SpaceStr = Space.to_str();
-      errs() << "Error: the name of dimension " << i
-             << " provided in -polly-context "
-             << "is '" << NameUserContext << "', but the name in the computed "
-             << "context is '" << NameContext
-             << "'. Due to this name mismatch, "
-             << "the -polly-context option is ignored. Please provide "
-             << "the context in the parameter space: " << SpaceStr << ".\n";
-      return;
-    }
-
-    UserContext = UserContext.set_dim_id(isl::dim::param, i,
-                                         Space.get_dim_id(isl::dim::param, i));
-  }
-
-  Context = Context.intersect(UserContext);
-}
-
 void Scop::buildContext() {
   isl::space Space = isl::space::params_alloc(getIslCtx(), 0);
   Context = isl::set::universe(Space);

From f4c2d57f767d870b4787c86b543ded8076fe108b Mon Sep 17 00:00:00 2001
From: Ben Hamilton <benhamilton@google.com>
Date: Tue, 16 Jul 2019 21:29:40 +0000
Subject: [PATCH 292/451] [clang-format] Don't detect call to ObjC class method
 as C++11 attribute specifier

Summary:
Previously, clang-format detected something like the following as a C++11 attribute specifier.

  @[[NSArray class]]

instead of an array with an Objective-C method call inside. In general, when the attribute specifier checking runs, if it sees 2 identifiers in a row, it decides that the square brackets represent an Objective-C method call. However, here, `class` is tokenized as a keyword instead of an identifier, so this check fails.

To fix this, the attribute specifier first checks whether the first square bracket has an "@" before it. If it does, then that square bracket is not the start of a attribute specifier because it is an Objective-C array literal. (The assumption is that @[[.*]] is not valid C/C++.)

Contributed by rkgibson2.

Reviewers: benhamilton

Reviewed By: benhamilton

Subscribers: aaron.ballman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64632

llvm-svn: 366267
---
 clang/lib/Format/TokenAnnotator.cpp   | 6 +++++-
 clang/unittests/Format/FormatTest.cpp | 6 ++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 6b698e24b5e50..490c4f46135e2 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -388,6 +388,10 @@ class AnnotatingParser {
   bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
     if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
       return false;
+    // The first square bracket is part of an ObjC array literal
+    if (Tok.Previous && Tok.Previous->is(tok::at)) {
+      return false;
+    }
     const FormatToken *AttrTok = Tok.Next->Next;
     if (!AttrTok)
       return false;
@@ -400,7 +404,7 @@ class AnnotatingParser {
     while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
       // ObjC message send. We assume nobody will use : in a C++11 attribute
       // specifier parameter, although this is technically valid:
-      // [[foo(:)]]
+      // [[foo(:)]].
       if (AttrTok->is(tok::colon) ||
           AttrTok->startsSequence(tok::identifier, tok::identifier) ||
           AttrTok->startsSequence(tok::r_paren, tok::identifier))
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index c4abad228d0a3..c1cec110137bd 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -7027,6 +7027,12 @@ TEST_F(FormatTest, UnderstandsSquareAttributes) {
   // On the other hand, we still need to correctly find array subscripts.
   verifyFormat("int a = std::vector<int>{1, 2, 3}[0];");
 
+  // Make sure that we do not mistake Objective-C method inside array literals
+  // as attributes, even if those method names are also keywords.
+  verifyFormat("@[ [foo bar] ];");
+  verifyFormat("@[ [NSArray class] ];");
+  verifyFormat("@[ [foo enum] ];");
+
   // Make sure we do not parse attributes as lambda introducers.
   FormatStyle MultiLineFunctions = getLLVMStyle();
   MultiLineFunctions.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;

From d746a210e16925d8c26bd8359598d95213712218 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 16 Jul 2019 21:30:41 +0000
Subject: [PATCH 293/451] [x86] use more phadd for reductions

This is part of what is requested by PR42023:
https://bugs.llvm.org/show_bug.cgi?id=42023

There's an extension needed for FP add, but exactly how we would specify
that using flags is not clear to me, so I left that as a TODO.
We're still missing patterns for partial reductions when the input vector
is 256-bit or 512-bit, but I think that's a failure of vector narrowing.
If we can reduce the widths, then this matching should work on those tests.

Differential Revision: https://reviews.llvm.org/D64760

llvm-svn: 366268
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 54 +++++++++++++++++++
 llvm/test/CodeGen/X86/phaddsub-extract.ll     | 44 ++++++---------
 .../CodeGen/X86/vector-reduce-add-widen.ll    | 22 +++-----
 llvm/test/CodeGen/X86/vector-reduce-add.ll    | 22 +++-----
 4 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 62499a28dff85..59540211d5495 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35624,6 +35624,57 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
   llvm_unreachable("All opcodes should return within switch");
 }
 
+/// Try to convert a vector reduction sequence composed of binops and shuffles
+/// into horizontal ops.
+static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG,
+                                            const X86Subtarget &Subtarget) {
+  assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");
+  bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
+  if (!Subtarget.hasFastHorizontalOps() && !OptForSize)
+    return SDValue();
+  SDValue Index = ExtElt->getOperand(1);
+  if (!isNullConstant(Index))
+    return SDValue();
+
+  // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros.
+  ISD::NodeType Opc;
+  SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD});
+  if (!Rdx)
+    return SDValue();
+
+  EVT VT = ExtElt->getValueType(0);
+  EVT VecVT = ExtElt->getOperand(0).getValueType();
+  if (VecVT.getScalarType() != VT)
+    return SDValue();
+
+  unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD;
+  SDLoc DL(ExtElt);
+
+  // 256-bit horizontal instructions operate on 128-bit chunks rather than
+  // across the whole vector, so we need an extract + hop preliminary stage.
+  // This is the only step where the operands of the hop are not the same value.
+  // TODO: We could extend this to handle 512-bit or even longer vectors.
+  if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) ||
+      ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) {
+    unsigned NumElts = VecVT.getVectorNumElements();
+    SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL);
+    SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL);
+    VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2);
+    Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo);
+  }
+  if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) &&
+      !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3()))
+    return SDValue();
+
+  // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0
+  assert(Rdx.getValueType() == VecVT && "Unexpected reduction match");
+  unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements());
+  for (unsigned i = 0; i != ReductionSteps; ++i)
+    Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx);
+
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);
+}
+
 /// Detect vector gather/scatter index generation and convert it from being a
 /// bunch of shuffles and extracts into a somewhat faster sequence.
 /// For i686, the best sequence is apparently storing the value and loading
@@ -35710,6 +35761,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
     return MinMax;
 
+  if (SDValue V = combineReductionToHorizontal(N, DAG, Subtarget))
+    return V;
+
   if (SDValue V = scalarizeExtEltFP(N, DAG))
     return V;
 
diff --git a/llvm/test/CodeGen/X86/phaddsub-extract.ll b/llvm/test/CodeGen/X86/phaddsub-extract.ll
index e81952d331c25..2a7039e932c36 100644
--- a/llvm/test/CodeGen/X86/phaddsub-extract.ll
+++ b/llvm/test/CodeGen/X86/phaddsub-extract.ll
@@ -1903,10 +1903,8 @@ define i16 @hadd16_8(<8 x i16> %x223) {
 ;
 ; SSE3-FAST-LABEL: hadd16_8:
 ; SSE3-FAST:       # %bb.0:
-; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE3-FAST-NEXT:    paddw %xmm0, %xmm1
-; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-FAST-NEXT:    paddw %xmm1, %xmm0
+; SSE3-FAST-NEXT:    phaddw %xmm0, %xmm0
+; SSE3-FAST-NEXT:    phaddw %xmm0, %xmm0
 ; SSE3-FAST-NEXT:    phaddw %xmm0, %xmm0
 ; SSE3-FAST-NEXT:    movd %xmm0, %eax
 ; SSE3-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1926,10 +1924,8 @@ define i16 @hadd16_8(<8 x i16> %x223) {
 ;
 ; AVX-FAST-LABEL: hadd16_8:
 ; AVX-FAST:       # %bb.0:
-; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -1956,10 +1952,9 @@ define i32 @hadd32_4(<4 x i32> %x225) {
 ;
 ; SSE3-FAST-LABEL: hadd32_4:
 ; SSE3-FAST:       # %bb.0:
-; SSE3-FAST-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE3-FAST-NEXT:    paddd %xmm0, %xmm1
-; SSE3-FAST-NEXT:    phaddd %xmm1, %xmm1
-; SSE3-FAST-NEXT:    movd %xmm1, %eax
+; SSE3-FAST-NEXT:    phaddd %xmm0, %xmm0
+; SSE3-FAST-NEXT:    phaddd %xmm0, %xmm0
+; SSE3-FAST-NEXT:    movd %xmm0, %eax
 ; SSE3-FAST-NEXT:    retq
 ;
 ; AVX-SLOW-LABEL: hadd32_4:
@@ -1973,8 +1968,7 @@ define i32 @hadd32_4(<4 x i32> %x225) {
 ;
 ; AVX-FAST-LABEL: hadd32_4:
 ; AVX-FAST:       # %bb.0:
-; AVX-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX-FAST-NEXT:    retq
@@ -2097,10 +2091,8 @@ define i32 @hadd32_16(<16 x i32> %x225) {
 define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize {
 ; SSE3-LABEL: hadd16_8_optsize:
 ; SSE3:       # %bb.0:
-; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE3-NEXT:    paddw %xmm0, %xmm1
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT:    paddw %xmm1, %xmm0
+; SSE3-NEXT:    phaddw %xmm0, %xmm0
+; SSE3-NEXT:    phaddw %xmm0, %xmm0
 ; SSE3-NEXT:    phaddw %xmm0, %xmm0
 ; SSE3-NEXT:    movd %xmm0, %eax
 ; SSE3-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -2108,10 +2100,8 @@ define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize {
 ;
 ; AVX-LABEL: hadd16_8_optsize:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -2129,16 +2119,14 @@ define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize {
 define i32 @hadd32_4_optsize(<4 x i32> %x225) optsize {
 ; SSE3-LABEL: hadd32_4_optsize:
 ; SSE3:       # %bb.0:
-; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE3-NEXT:    paddd %xmm0, %xmm1
-; SSE3-NEXT:    phaddd %xmm1, %xmm1
-; SSE3-NEXT:    movd %xmm1, %eax
+; SSE3-NEXT:    phaddd %xmm0, %xmm0
+; SSE3-NEXT:    phaddd %xmm0, %xmm0
+; SSE3-NEXT:    movd %xmm0, %eax
 ; SSE3-NEXT:    retq
 ;
 ; AVX-LABEL: hadd32_4_optsize:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll
index b886a745edc1c..6dc5a2b54b506 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll
@@ -254,8 +254,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ;
 ; AVX1-FAST-LABEL: test_v4i32:
 ; AVX1-FAST:       # %bb.0:
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    retq
@@ -307,9 +306,8 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX1-FAST-LABEL: test_v8i32:
 ; AVX1-FAST:       # %bb.0:
 ; AVX1-FAST-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm1, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    vzeroupper
@@ -635,10 +633,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ;
 ; AVX1-FAST-LABEL: test_v8i16:
 ; AVX1-FAST:       # %bb.0:
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -704,11 +700,9 @@ define i16 @test_v16i16(<16 x i16> %a0) {
 ; AVX1-FAST-LABEL: test_v16i16:
 ; AVX1-FAST:       # %bb.0:
 ; AVX1-FAST-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm1, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll
index 02fb375a318f5..630299a1824e0 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll
@@ -241,8 +241,7 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ;
 ; AVX1-FAST-LABEL: test_v4i32:
 ; AVX1-FAST:       # %bb.0:
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    retq
@@ -294,9 +293,8 @@ define i32 @test_v8i32(<8 x i32> %a0) {
 ; AVX1-FAST-LABEL: test_v8i32:
 ; AVX1-FAST:       # %bb.0:
 ; AVX1-FAST-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm1, %xmm0
+; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    vzeroupper
@@ -605,10 +603,8 @@ define i16 @test_v8i16(<8 x i16> %a0) {
 ;
 ; AVX1-FAST-LABEL: test_v8i16:
 ; AVX1-FAST:       # %bb.0:
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -674,11 +670,9 @@ define i16 @test_v16i16(<16 x i16> %a0) {
 ; AVX1-FAST-LABEL: test_v16i16:
 ; AVX1-FAST:       # %bb.0:
 ; AVX1-FAST-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-FAST-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm1, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
+; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vphaddw %xmm0, %xmm0, %xmm0
 ; AVX1-FAST-NEXT:    vmovd %xmm0, %eax
 ; AVX1-FAST-NEXT:    # kill: def $ax killed $ax killed $eax

From 607cd44bdc6a5117241fdc59191bf78da7466b0c Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Tue, 16 Jul 2019 21:34:59 +0000
Subject: [PATCH 294/451] [ORC][docs] Trim ORCv1 to ORCv2 transition section,
 add a how-to section.

llvm-svn: 366269
---
 llvm/docs/ORCv2.rst | 409 +++++++++++++++++++++++++++++++-------------
 1 file changed, 288 insertions(+), 121 deletions(-)

diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst
index 4daa12f560869..2f37a2fbdad59 100644
--- a/llvm/docs/ORCv2.rst
+++ b/llvm/docs/ORCv2.rst
@@ -2,6 +2,9 @@
 ORC Design and Implementation
 ===============================
 
+.. contents::
+   :local:
+
 Introduction
 ============
 
@@ -9,9 +12,6 @@ This document aims to provide a high-level overview of the design and
 implementation of the ORC JIT APIs. Except where otherwise stated, all
 discussion applies to the design of the APIs as of LLVM verison 9 (ORCv2).
 
-.. contents::
-   :local:
-
 Use-cases
 =========
 
@@ -158,7 +158,7 @@ common symbol definitions.
 
 To see how this works, imagine a program ``foo`` which links against a pair
 of dynamic libraries: ``libA`` and ``libB``. On the command line, building this
-system might look like:
+program might look like:
 
 .. code-block:: bash
 
@@ -196,29 +196,30 @@ checking omitted for brevity) as:
   auto MainSym = ExitOnErr(ES.lookup({&ES.getMainJITDylib()}, "main"));
   auto *Main = (int(*)(int, char*[]))MainSym.getAddress();
 
-  int Result = Main(...);
-
+v  int Result = Main(...);
 
 This example tells us nothing about *how* or *when* compilation will happen.
-That will depend on the implementation of the hypothetical CXXCompilingLayer,
-but the linking rules will be the same regardless. For example, if a1.cpp and
-a2.cpp both define a function "foo" the API should generate a duplicate
-definition error. On the other hand, if a1.cpp and b1.cpp both define "foo"
-there is no error (different dynamic libraries may define the same symbol). If
-main.cpp refers to "foo", it should bind to the definition in LibA rather than
-the one in LibB, since main.cpp is part of the "main" dylib, and the main dylib
-links against LibA before LibB.
+That will depend on the implementation of the hypothetical CXXCompilingLayer.
+The same linker-based symbol resolution rules will apply regardless of that
+implementation, however. For example, if a1.cpp and a2.cpp both define a
+function "foo" then ORCv2 will generate a duplicate definition error. On the
+other hand, if a1.cpp and b1.cpp both define "foo" there is no error (different
+dynamic libraries may define the same symbol). If main.cpp refers to "foo", it
+should bind to the definition in LibA rather than the one in LibB, since
+main.cpp is part of the "main" dylib, and the main dylib links against LibA
+before LibB.
 
 Many JIT clients will have no need for this strict adherence to the usual
-ahead-of-time linking rules and should be able to get by just fine by putting
+ahead-of-time linking rules, and should be able to get by just fine by putting
 all of their code in a single JITDylib. However, clients who want to JIT code
 for languages/projects that traditionally rely on ahead-of-time linking (e.g.
 C++) will find that this feature makes life much easier.
 
-Symbol lookup in ORC serves two other important functions, beyond basic lookup:
-(1) It triggers compilation of the symbol(s) searched for, and (2) it provides
-the synchronization mechanism for concurrent compilation. The pseudo-code for
-the lookup process is:
+Symbol lookup in ORC serves two other important functions, beyond providing
+addresses for symbols: (1) It triggers compilation of the symbol(s) searched for
+(if they have not been compiled already), and (2) it provides the
+synchronization mechanism for concurrent compilation. The pseudo-code for the
+lookup process is:
 
 .. code-block:: none
 
@@ -229,13 +230,13 @@ the lookup process is:
   dispatch materializers (if any)
 
 In this context a materializer is something that provides a working definition
-of a symbol upon request. Generally materializers wrap compilers, but they may
-also wrap a linker directly (if the program representation backing the
-definitions is an object file), or even just a class that writes bits directly
-into memory (if the definitions are stubs). Materialization is the blanket term
-for any actions (compiling, linking, splatting bits, registering with runtimes,
-etc.) that is requried to generate a symbol definition that is safe to call or
-access.
+of a symbol upon request. Usually materializers are just wrappers for compilers,
+but they may also wrap a jit-linker directly (if the program representation
+backing the definitions is an object file), or may even be a class that writes
+bits directly into memory (for example, if the definitions are
+stubs). Materialization is the blanket term for any actions (compiling, linking,
+splatting bits, registering with runtimes, etc.) that are requried to generate a
+symbol definition that is safe to call or access.
 
 As each materializer completes its work it notifies the JITDylib, which in turn
 notifies any query objects that are waiting on the newly materialized
@@ -314,126 +315,292 @@ TBD.
 Transitioning from ORCv1 to ORCv2
 =================================
 
-Since LLVM 7.0 new ORC developement has focused on adding support for concurrent
-compilation. In order to enable concurrency new APIs were introduced
-(ExecutionSession, JITDylib, etc.) and new implementations of existing layers
-were written. In LLVM 8.0 the old layer implementations, which do not support
-concurrency, were renamed (with a "Legacy" prefix), but remained in tree.  In
-LLVM 9.0 we have added a deprecation warning for the old layers and utilities,
-and in LLVM 10.0 the old layers and utilities will be removed.
+Since LLVM 7.0, new ORC development work has focused on adding support for
+concurrent JIT compilation. The new APIs (including new layer interfaces and
+implementations, and new utilities) that support concurrency are collectively
+referred to as ORCv2, and the original, non-concurrent layers and utilities
+are now referred to as ORCv1.
+
+The majority of the ORCv1 layers and utilities were renamed with a 'Legacy'
+prefix in LLVM 8.0, and have deprecation warnings attached in LLVM 9.0. In LLVM
+10.0 ORCv1 will be removed entirely.
+
+Transitioning from ORCv1 to ORCv2 should be easy for most clients. Most of the
+ORCv1 layers and utilities have ORCv2 counterparts[2]_ that can be directly
+substituted. However there are some design differences between ORCv1 and ORCv2
+to be aware of:
+
+  1. ORCv2 fully adopts the JIT-as-linker model that began with MCJIT. Modules
+     (and other program representations, e.g. Object Files)  are no longer added
+     directly to JIT classes or layers. Instead, they are added to ``JITDylib``
+     instances *by* layers. The ``JITDylib`` determines *where* the definitions
+     reside, the layers determine *how* the definitions will be compiled.
+     Linkage relationships between ``JITDylibs`` determine how inter-module
+     references are resolved, and symbol resolvers are no longer used. See the
+     section `Design Overview`_ for more details.
+
+     Unless multiple JITDylibs are needed to model linkage relationsips, ORCv1
+     clients should place all code in the main JITDylib (returned by
+     ``ExecutionSession::getMainJITDylib()``). MCJIT clients should use LLJIT
+     (see `LLJIT and LLLazyJIT`_).
+
+  2. All JIT stacks now need an ``ExecutionSession`` instance. ExecutionSession
+     manages the string pool, error reporting, synchronization, and symbol
+     lookup.
+
+  3. ORCv2 uses uniqued strings (``SymbolStringPtr`` instances) rather than
+     string values in order to reduce memory overhead and improve lookup
+     performance. See the subsection `How to manage symbol strings`_.
 
-Clients currently using the legacy (ORCv1) layers and utilities will usually
-find it easy to transition to the newer (ORCv2) variants. Most of the ORCv1
-layers and utilities have ORCv2 counterparts[2]_ that can be
-substituted. However there are some differences between ORCv1 and ORCv2 to be
-aware of:
-
-  1. All JIT stacks now need an ExecutionSession instance which manages the
-     string pool, error reporting, synchronization, and symbol lookup.
+  4. IR layers require ThreadSafeModule instances, rather than
+     std::unique_ptr<Module>s. ThreadSafeModule is a wrapper that ensures that
+     Modules that use the same LLVMContext are not accessed concurrently.
+     See `How to use ThreadSafeModule and ThreadSafeContext`_.
 
-  2. ORCv2 uses uniqued strings (``SymbolStringPtr`` instances) to reduce memory
-     overhead and improve lookup performance. To get a uniqued string, call
-     ``intern`` on your ExecutionSession instance:
+  5. Symbol lookup is no longer handled by layers. Instead, there is a
+     ``lookup`` method on JITDylib that takes a list of JITDylibs to scan.
 
      .. code-block:: c++
 
        ExecutionSession ES;
+       JITDylib &JD1 = ...;
+       JITDylib &JD2 = ...;
 
-       /// ...
+       auto Sym = ES.lookup({&JD1, &JD2}, ES.intern("_main"));
 
-       auto MainSymbolName = ES.intern("main");
+  6. Module removal is not yet supported. There is no equivalent of the
+     layer concept removeModule/removeObject methods. Work on resource tracking
+     and removal in ORCv2 is ongoing.
 
-  3. Program representations (Modules, Object Files, etc.) are no longer added
-     *to* layers. Instead they are added *to* JITDylibs *by* layers. The layer
-     determines how the program representation will be compiled if it is needed.
-     The JITDylib provides the symbol table, enforces linkage rules (e.g.
-     rejecting duplicate definitions), and synchronizes concurrent compiles.
+For code examples and suggestions of how to use the ORCv2 APIs, please see
+the section `How-tos`_.
 
-     Most ORCv1 clients (or MCJIT clients wanting to try out ORCv2) should
-     simply add code to the default *main* JITDylib provided by the
-     ExecutionSession:
+How-tos
+=======
 
-     .. code-block:: c++
+How to manage symbol strings
+############################
 
-       ExecutionSession ES;
-       RTDyldObjectLinkingLayer ObjLinkingLayer(
-         ES, []() { return llvm::make_unique<SectionMemoryManager>(); });
-       IRCompileLayer CompileLayer(ES, ObjLinkingLayer, SimpleIRCompiler(TM));
+Symbol strings in ORC are uniqued to improve lookup performance, reduce memory
+overhead, and allow symbol names to function as efficient keys. To get the
+unique ``SymbolStringPtr`` for a string value, call the
+``ExecutionSession::intern`` method:
 
-       auto M = loadModule(...);
+  .. code-block:: c++
 
-       if (auto Err = CompileLayer.add(ES.getMainJITDylib(), M))
-         return Err;
+    ExecutionSession ES;
+    /// ...
+    auto MainSymbolName = ES.intern("main");
 
-  4. IR layers require ThreadSafeModule instances, rather than
-     std::unique_ptr<Module>s. A ThreadSafeModule instance is a pair of a
-     std::unique_ptr<Module> and a ThreadSafeContext, which is in turn a
-     pair of a std::unique_ptr<LLVMContext> and a lock. This allows the JIT
-     to ensure that the LLVMContext for a module is locked before the module
-     is accessed. Multiple ThreadSafeModules may share a ThreadSafeContext
-     value, but in that case the modules will not be able to be compiled
-     concurrently[3]_.
+If you wish to perform lookup using the C/IR name of a symbol you will also
+need to apply the platform linker-mangling before interning the string. On
+Linux this mangling is a no-op, but on other platforms it usually involves
+adding a prefix to the string (e.g. '_' on Darwin). The mangling scheme is
+based on the DataLayout for the target. Given a DataLayout and an
+ExecutionSession, you can create a MangleAndInterner function object that
+will perform both jobs for you:
 
-     ThreadSafeContexts may be constructed explicitly:
+  .. code-block:: c++
 
-     .. code-block:: c++
+    ExecutionSession ES;
+    const DataLayout &DL = ...;
+    MangleAndInterner Mangle(ES, DL);
 
-       // ThreadSafeContext shared between two modules.
-       ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
-       ThreadSafeModule TSM1(
-         llvm::make_unique<Module>("M1", *TSCtx.getContext()), TSCtx);
-       ThreadSafeModule TSM2(
-         llvm::make_unique<Module>("M2", *TSCtx.getContext()), TSCtx);
+    // ...
 
-     , or they can be created implicitly by passing a new LLVMContext to the
-     ThreadSafeModuleConstructor:
+    // Portable IR-symbol-name lookup:
+    auto Sym = ES.lookup({&ES.getMainJITDylib()}, Mangle("main"));
 
-     .. code-block:: c++
+How to create JITDylibs and set up linkage relationships
+########################################################
 
-       // Constructing a ThreadSafeModule (and implicitly a ThreadSafeContext)
-       // from a pair of a Module and a Context.
-       auto Ctx = llvm::make_unique<LLVMContext>();
-       auto M = llvm::make_unique<Module>("M", *Ctx);
-       return ThreadSafeModule(std::move(M), std::move(Ctx));
-
-  5. The symbol resolution and lookup scheme have been fundamentally changed.
-     Symbol lookup has been removed from the layer interface. Instead,
-     symbols are looked up via the ``ExecutionSession::lookup`` method by
-     scanning a list of JITDylibs.
-
-     SymbolResolvers have been removed entirely. Resolution rules now follow the
-     linkage relationship between JITDylibs. For example, to resolve a reference
-     to a symbol *F* from a module *M* that has been added to JITDylib *J1* we
-     would first search for a definition of *F* in *J1* then (if no definition
-     was found) search each of the JITDylibs that *J1* links against.
-
-     While the new resolution scheme is, strictly speaking, less flexible than
-     the old scheme of customizable resolvers this has not yet led to problems
-     in practice. Instead, using standard linker rules has removed a lot of
-     boilerplate while providing correct[4]_ behavior for common and weak symbols.
-
-     One notable difference is in exposing in-process symbols to the JIT. To
-     support this (without requiring the set of symbols to be enumerated up
-     front), JITDylibs allow for a *GeneratorFunction* to be attached to
-     generate new definitions upon lookup. Reflecting the processes symbols into
-     the JIT can be done by writing:
+In ORC, all symbol definitions reside in JITDylibs. JITDylibs are created by
+calling the ``ExecutionSession::createJITDylib`` method with a unique name:
 
-     .. code-block:: c++
+  .. code-block:: c++
 
-       ExecutionSession ES;
-       const auto DataLayout &DL = ...;
+    ExecutionSession ES;
+    auto &JD = ES.createJITDylib("libFoo.dylib");
 
-       {
-         auto ProcessSymbolsGenerator =
-           DynamicLibrarySearchGenerator::GetForCurrentProcess(DL.getGlobalPrefix());
-         if (!ProcessSymbolsGenerator)
-           return ProcessSymbolsGenerator.takeError();
-         ES.getMainJITDylib().setGenerator(std::move(*ProcessSymbolsGenerator));
-       }
+The JITDylib is owned by the ``ExecutionEngine`` instance and will be freed
+when it is destroyed.
 
-  6. Module removal is not yet supported. There is no equivalent of the
-     layer concept removeModule/removeObject methods. Work on resource tracking
-     and removal in ORCv2 is ongoing.
+A JITDylib representing the JIT main program is created by ExecutionEngine by
+default. A reference to it can be obtained by calling
+``ExecutionSession::getMainJITDylib()``:
+
+  .. code-block:: c++
+
+    ExecutionSession ES;
+    auto &MainJD = ES.getMainJITDylib();
+
+How to use ThreadSafeModule and ThreadSafeContext
+#################################################
+
+ThreadSafeModule and ThreadSafeContext are wrappers around Modules and
+LLVMContexts respectively. A ThreadSafeModule is a pair of a
+std::unique_ptr<Module> and a (possibly shared) ThreadSafeContext value. A
+ThreadSafeContext is a pair of a std::unique_ptr<LLVMContext> and a lock.
+This design serves two purposes: providing both a locking scheme and lifetime
+management for LLVMContexts. The ThreadSafeContext may be locked to prevent
+accidental concurrent access by two Modules that use the same LLVMContext.
+The underlying LLVMContext is freed once all ThreadSafeContext values pointing
+to it are destroyed, allowing the context memory to be reclaimed as soon as
+the Modules referring to it are destroyed.
+
+ThreadSafeContexts can be explicitly constructed from a
+std::unique_ptr<LLVMContext>:
+
+  .. code-block:: c++
+    ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
+
+ThreadSafeModules can be constructed from a pair of a std::unique_ptr<Module>
+and a ThreadSafeContext value. ThreadSafeContext values may be shared between
+multiple ThreadSafeModules:
+
+  .. code-block:: c++
+
+    ThreadSafeModule TSM1(
+      llvm::make_unique<Module>("M1", *TSCtx.getContext()), TSCtx);
+
+    ThreadSafeModule TSM2(
+      llvm::make_unique<Module>("M2", *TSCtx.getContext()), TSCtx);
+
+Before using a ThreadSafeContext, clients should ensure that either the context
+is only accessible on the current thread, or that the context is locked. In the
+example above (where the context is never locked) we rely on the fact that both
+``TSM1`` and ``TSM2``, and TSCtx are all created on one thread. If a context is
+going to be shared between threads then it must be locked before the context,
+or any Modules attached to it, are accessed. When code is added to in-tree IR
+layers this locking is is done automatically by the
+``BasicIRLayerMaterializationUnit::materialize`` method. In all other
+situations, for example when writing a custom IR materialization unit, or
+constructing a new ThreadSafeModule from higher-level program representations,
+locking must be done explicitly:
+
+  .. code-block:: c++
+
+    void HighLevelRepresentationLayer::emit(MaterializationResponsibility R,
+                                            HighLevelProgramRepresentation H) {
+      // Get or create a context value that may be shared between threads.
+      ThreadSafeContext TSCtx = getContext();
+
+      // Lock the context to prevent concurrent access.
+      auto Lock = TSCtx.getLock();
+
+      // IRGen a module onto the locked Context.
+      ThreadSafeModule TSM(IRGen(H, *TSCtx.getContext()), TSCtx);
+
+      // Emit the module to the base layer with the context still locked.
+      BaseIRLayer.emit(std::move(R), std::move(TSM));
+    }
+
+Clients wishing to maximize possibilities for concurrent compilation will want
+to create every new ThreadSafeModule on a new ThreadSafeContext. For this reason
+a convenience constructor for ThreadSafeModule is provided that implicitly
+constructs a new ThreadSafeContext value from a std::unique_ptr<LLVMContext>:
+
+  .. code-block:: c++
+
+    // Maximize concurrency opportunities by loading every module on a
+    // separate context.
+    for (const auto &IRPath : IRPaths) {
+      auto Ctx = llvm::make_unique<LLVMContext>();
+      auto M = llvm::make_unique<LLVMContext>("M", *Ctx);
+      CompileLayer.add(ES.getMainJITDylib(),
+                       ThreadSafeModule(std::move(M), std::move(Ctx)));
+    }
+
+Clients who plan to run single-threaded may choose to save memory by loading
+all modules on the same context:
+
+  .. code-block:: c++
+
+    // Save memory by using one context for all Modules:
+    ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
+    for (const auto &IRPath : IRPaths) {
+      ThreadSafeModule TSM(parsePath(IRPath, *TSCtx.getContext()), TSCtx);
+      CompileLayer.add(ES.getMainJITDylib(), ThreadSafeModule(std::move(TSM));
+    }
+
+How to Add Process and Library Symbols to the JITDylibs
+=======================================================
+
+JIT'd code typically needs access to symbols in the host program or in
+supporting libraries. References to process symbols can be "baked in" to code
+as it is compiled by turning external references into pre-resolved integer
+constants, however this ties the JIT'd code to the current process's virtual
+memory layout (meaning that it can not be cached between runs) and makes
+debugging lower level program representations difficult (as all external
+references are opaque integer values). A bettor solution is to maintain symbolic
+external references and let the jit-linker bind them for you at runtime. To
+allow the JIT linker to find these external definitions their addresses must
+be added to a JITDylib that the JIT'd definitions link against.
+
+Adding definitions for external symbols could be done using the absoluteSymbols
+function:
+
+  .. code-block:: c++
+
+    const DataLayout &DL = getDataLayout();
+    MangleAndInterner Mangle(ES, DL);
+
+    auto &JD = ES.getMainJITDylib();
+
+    JD.define(
+      absoluteSymbols({
+        { Mangle("puts"), pointerToJITTargetAddress(&puts)},
+        { Mangle("gets"), pointerToJITTargetAddress(&getS)}
+      }));
+
+Manually adding absolute symbols for a large or changing interface is cumbersome
+however, so ORC provides an alternative to generate new definitions on demand:
+*definition generators*. If a definition generator is attached to a JITDylib,
+then any unsuccessful lookup on that JITDylib will fall back to calling the
+definition generator, and the definition generator may choose to generate a new
+definition for the missing symbols. Of particular use here is the
+``DynamicLibrarySearchGenerator`` utility. This can be used to reflect the whole
+exported symbol set of the process or a specific dynamic library, or a subset
+of either of these determined by a predicate.
+
+For example, to load the whole interface of a runtime library:
+
+  .. code-block:: c++
+
+    const DataLayout &DL = getDataLayout();
+    auto &JD = ES.getMainJITDylib();
+
+    JD.setGenerator(DynamicLibrarySearchGenerator::Load("/path/to/lib"
+                                                        DL.getGlobalPrefix()));
+
+    // IR added to JD can now link against all symbols exported by the library
+    // at '/path/to/lib'.
+    CompileLayer.add(JD, loadModule(...));
+
+Or, to expose a whitelisted set of symbols from the main process:
+
+  .. code-block:: c++
+
+    const DataLayout &DL = getDataLayout();
+    MangleAndInterner Mangle(ES, DL);
+
+    auto &JD = ES.getMainJITDylib();
+
+    DenseSet<SymbolStringPtr> Whitelist({
+        Mangle("puts"),
+        Mangle("gets")
+      });
+
+    // Use GetForCurrentProcess with a predicate function that checks the
+    // whitelist.
+    JD.setGenerator(
+      DynamicLibrarySearchGenerator::GetForCurrentProcess(
+        DL.getGlobalPrefix(),
+        [&](const SymbolStringPtr &S) { return Whitelist.count(S); }));
+
+    // IR added to JD can now link against any symbols exported by the process
+    // and contained in the whitelist.
+    CompileLayer.add(JD, loadModule(...));
 
 Future Features
 ===============

From c23619b0c90056f9bd63f9b5d79caf5bf63618e8 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Tue, 16 Jul 2019 21:41:43 +0000
Subject: [PATCH 295/451] [ORC][docs] Fix an RST error: the code-block
 directive needs a newline after it.

llvm-svn: 366270
---
 llvm/docs/ORCv2.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst
index 2f37a2fbdad59..7423c041d40a5 100644
--- a/llvm/docs/ORCv2.rst
+++ b/llvm/docs/ORCv2.rst
@@ -452,6 +452,7 @@ ThreadSafeContexts can be explicitly constructed from a
 std::unique_ptr<LLVMContext>:
 
   .. code-block:: c++
+
     ThreadSafeContext TSCtx(llvm::make_unique<LLVMContext>());
 
 ThreadSafeModules can be constructed from a pair of a std::unique_ptr<Module>

From 21f2858dcf3a556f01f6ae151bf7638b70f01c02 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 22:00:10 +0000
Subject: [PATCH 296/451] AMDGPU: Partially revert r366250

GCCBuiltin doesn't work for these, because they have a mangled type
(although they arguably should not).

llvm-svn: 366271
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 4c67cdea4d580..1f835171386f7 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1249,15 +1249,12 @@ def int_amdgcn_ds_swizzle :
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem, IntrConvergent, ImmArg<1>]>;
 
-def int_amdgcn_ubfe :
-  GCCBuiltin<"__builtin_amdgcn_ubfe">,
-  Intrinsic<[llvm_anyint_ty],
+def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
     [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem, IntrSpeculatable]
 >;
 
-def int_amdgcn_sbfe : GCCBuiltin<"__builtin_amdgcn_sbfe">,
-  Intrinsic<[llvm_anyint_ty],
+def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
     [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty],
     [IntrNoMem, IntrSpeculatable]
 >;

From 42bba4b852b1a63db4043798bba7d9fcea61cbaf Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <gzchen@google.com>
Date: Tue, 16 Jul 2019 22:00:45 +0000
Subject: [PATCH 297/451] [WebAssembly] Implement thread-local storage
 (local-exec model)

Summary:
Thread local variables are placed inside a `.tdata` segment. Their symbols are
offsets from the start of the segment. The address of a thread local variable
is computed as `__tls_base` + the offset from the start of the segment.

`.tdata` segment is a passive segment and `memory.init` is used once per thread
to initialize the thread local storage.

`__tls_base` is a wasm global. Since each thread has its own wasm instance,
it is effectively thread local. Currently, `__tls_base` must be initialized
at thread startup, and so cannot be used with dynamic libraries.

`__tls_base` is to be initialized with a new linker-synthesized function,
`__wasm_init_tls`, which takes as an argument a block of memory to use as the
storage for thread locals. It then initializes the block of memory and sets
`__tls_base`. As `__wasm_init_tls` will handle the memory initialization,
the memory does not have to be zeroed.

To help allocating memory for thread-local storage, a new compiler intrinsic
is introduced: `__builtin_wasm_tls_size()`. This instrinsic function returns
the size of the thread-local storage for the current function.

The expected usage is to run something like the following upon thread startup:

    __wasm_init_tls(malloc(__builtin_wasm_tls_size()));

Reviewers: tlively, aheejin, kripken, sbc100

Subscribers: dschuff, jgravelle-google, hiraditya, sunfish, jfb, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D64537

llvm-svn: 366272
---
 .../clang/Basic/BuiltinsWebAssembly.def       |  3 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  5 ++
 clang/test/CodeGen/builtins-wasm.c            |  6 ++
 lld/test/wasm/data-segments.ll                | 20 +++--
 lld/test/wasm/tls.ll                          | 81 ++++++++++++++++++
 lld/wasm/Driver.cpp                           | 25 ++++++
 lld/wasm/Symbols.cpp                          | 11 ++-
 lld/wasm/Symbols.h                            | 13 +++
 lld/wasm/Writer.cpp                           | 81 +++++++++++++++++-
 llvm/include/llvm/BinaryFormat/Wasm.h         |  2 +
 llvm/include/llvm/IR/IntrinsicsWebAssembly.td |  9 ++
 llvm/include/llvm/MC/MCSectionWasm.h          |  3 +-
 .../WebAssembly/WebAssemblyFastISel.cpp       |  4 +
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   | 49 +++++++++++
 .../WebAssembly/WebAssemblyMCInstLower.cpp    |  8 +-
 .../WebAssembly/WebAssemblyTargetMachine.cpp  | 23 +++--
 .../WebAssembly/target-features-tls.ll        | 32 +++----
 llvm/test/CodeGen/WebAssembly/tls.ll          | 85 ++++++++++++++++---
 18 files changed, 413 insertions(+), 47 deletions(-)
 create mode 100644 lld/test/wasm/tls.ll

diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index 57ebb27ab4695..63177f016ac77 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -29,6 +29,9 @@ BUILTIN(__builtin_wasm_memory_grow, "zIiz", "n")
 TARGET_BUILTIN(__builtin_wasm_memory_init, "vIUiIUiv*UiUi", "", "bulk-memory")
 TARGET_BUILTIN(__builtin_wasm_data_drop, "vIUi", "", "bulk-memory")
 
+// Thread-local storage
+TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory")
+
 // Floating point min/max
 BUILTIN(__builtin_wasm_min_f32, "fff", "nc")
 BUILTIN(__builtin_wasm_max_f32, "fff", "nc")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index acaa81ae8a9a6..1658be5a88e02 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13913,6 +13913,11 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop);
     return Builder.CreateCall(Callee, {Arg});
   }
+  case WebAssembly::BI__builtin_wasm_tls_size: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
+    return Builder.CreateCall(Callee);
+  }
   case WebAssembly::BI__builtin_wasm_throw: {
     Value *Tag = EmitScalarExpr(E->getArg(0));
     Value *Obj = EmitScalarExpr(E->getArg(1));
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index 4784d6ff86ebd..8a17fb39641b3 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -38,6 +38,12 @@ void data_drop() {
   // WEBASSEMBLY64: call void @llvm.wasm.data.drop(i32 3)
 }
 
+__SIZE_TYPE__ tls_size() {
+  return __builtin_wasm_tls_size();
+  // WEBASSEMBLY32: call i32 @llvm.wasm.tls.size.i32()
+  // WEBASSEMBLY64: call i64 @llvm.wasm.tls.size.i64()
+}
+
 void throw(void *obj) {
   return __builtin_wasm_throw(0, obj);
   // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 0, i8* %{{.*}})
diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll
index a9a403f3c5f10..944895a0d39cc 100644
--- a/lld/test/wasm/data-segments.ll
+++ b/lld/test/wasm/data-segments.ll
@@ -4,11 +4,11 @@
 
 ; atomics => active segments (TODO: error)
 ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.o -o %t.atomics.wasm
-; RUN: obj2yaml %t.atomics.wasm | FileCheck %s --check-prefix ACTIVE
+; RUN: obj2yaml %t.atomics.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS
 
 ; atomics, active segments => active segments
 ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --active-segments %t.atomics.o -o %t.atomics.active.wasm
-; RUN: obj2yaml %t.atomics.active.wasm | FileCheck %s --check-prefix ACTIVE
+; RUN: obj2yaml %t.atomics.active.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS
 
 ; atomics, passive segments => error
 ; RUN: not wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --passive-segments %t.atomics.o -o %t.atomics.passive.wasm 2>&1 | FileCheck %s --check-prefix ERROR
@@ -27,15 +27,15 @@
 
 ; atomics, bulk memory => active segments (TODO: passive)
 ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.wasm
-; RUN: obj2yaml %t.atomics.bulk-mem.wasm | FileCheck %s --check-prefix ACTIVE
+; RUN: obj2yaml %t.atomics.bulk-mem.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS
 
 ; atomics, bulk memory, active segments   => active segments
 ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --active-segments %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.active.wasm
-; RUN: obj2yaml %t.atomics.bulk-mem.active.wasm | FileCheck %s --check-prefix ACTIVE
+; RUN: obj2yaml %t.atomics.bulk-mem.active.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS
 
 ; atomics, bulk memory, passive segments  => passive segments
 ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --passive-segments %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.passive.wasm
-; RUN: obj2yaml %t.atomics.bulk-mem.passive.wasm | FileCheck %s --check-prefix PASSIVE
+; RUN: obj2yaml %t.atomics.bulk-mem.passive.wasm | FileCheck %s --check-prefixes PASSIVE,PASSIVE-TLS
 
 target triple = "wasm32-unknown-unknown"
 
@@ -54,6 +54,9 @@ target triple = "wasm32-unknown-unknown"
 ; ACTIVE-NEXT:      - Index:           0
 ; ACTIVE-NEXT:        Locals:          []
 ; ACTIVE-NEXT:        Body:            0B
+; ACTIVE-TLS-NEXT:  - Index:           1
+; ACTIVE-TLS-NEXT:    Locals:          []
+; ACTIVE-TLS-NEXT:    Body:            0B
 ; ACTIVE-NEXT:  - Type:            DATA
 ; ACTIVE-NEXT:    Segments:
 ; ACTIVE-NEXT:      - SectionOffset:   7
@@ -80,6 +83,8 @@ target triple = "wasm32-unknown-unknown"
 ; ACTIVE-NEXT:    FunctionNames:
 ; ACTIVE-NEXT:      - Index:           0
 ; ACTIVE-NEXT:        Name:            __wasm_call_ctors
+; ACTIVE-TLS-NEXT:  - Index:           1
+; ACTIVE-TLS-NEXT:    Name:            __wasm_init_tls
 
 ; PASSIVE-LABEL: - Type:            CODE
 ; PASSIVE-NEXT:    Functions:
@@ -89,6 +94,9 @@ target triple = "wasm32-unknown-unknown"
 ; PASSIVE-NEXT:      - Index:           1
 ; PASSIVE-NEXT:        Locals:          []
 ; PASSIVE-NEXT:        Body:            41800841004114FC080000FC090041940841004190CE00FC080100FC090141A4D6004100410DFC080200FC09020B
+; PASSIVE-TLS-NEXT:  - Index:           2
+; PASSIVE-TLS-NEXT:    Locals:          []
+; PASSIVE-TLS-NEXT:    Body:            0B
 ; PASSIVE-NEXT:  - Type:            DATA
 ; PASSIVE-NEXT:    Segments:
 ; PASSIVE-NEXT:      - SectionOffset:   3
@@ -108,3 +116,5 @@ target triple = "wasm32-unknown-unknown"
 ; PASSIVE-NEXT:        Name:            __wasm_call_ctors
 ; PASSIVE-NEXT:      - Index:           1
 ; PASSIVE-NEXT:        Name:            __wasm_init_memory
+; PASSIVE-TLS-NEXT:  - Index:           2
+; PASSIVE-TLS-NEXT:    Name:            __wasm_init_tls
diff --git a/lld/test/wasm/tls.ll b/lld/test/wasm/tls.ll
new file mode 100644
index 0000000000000..b570d46756875
--- /dev/null
+++ b/lld/test/wasm/tls.ll
@@ -0,0 +1,81 @@
+; RUN: llc -mattr=+bulk-memory -filetype=obj %s -o %t.o
+
+target triple = "wasm32-unknown-unknown"
+
+@tls1 = thread_local(localexec) global i32 1, align 4
+@no_tls = global i32 0, align 4
+@tls2 = thread_local(localexec) global i32 1, align 4
+
+define i32* @tls1_addr() {
+  ret i32* @tls1
+}
+
+define i32* @tls2_addr() {
+  ret i32* @tls2
+}
+
+; RUN: wasm-ld -no-gc-sections --shared-memory --max-memory=131072 --no-entry -o %t.wasm %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; RUN: wasm-ld -no-gc-sections --shared-memory --max-memory=131072 --no-merge-data-segments --no-entry -o %t.wasm %t.o
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK:      - Type:            GLOBAL
+; CHECK-NEXT:   Globals:
+; CHECK-NEXT:     - Index:           0
+; CHECK-NEXT:       Type:            I32
+; CHECK-NEXT:       Mutable:         true
+; CHECK-NEXT:       InitExpr:
+; CHECK-NEXT:         Opcode:          I32_CONST
+; CHECK-NEXT:         Value:           66576
+; CHECK-NEXT:     - Index:           1
+; CHECK-NEXT:       Type:            I32
+; CHECK-NEXT:       Mutable:         true
+; CHECK-NEXT:       InitExpr:
+; CHECK-NEXT:         Opcode:          I32_CONST
+; CHECK-NEXT:         Value:           0
+; CHECK-NEXT:     - Index:           2
+; CHECK-NEXT:       Type:            I32
+; CHECK-NEXT:       Mutable:         false
+; CHECK-NEXT:       InitExpr:
+; CHECK-NEXT:         Opcode:          I32_CONST
+; CHECK-NEXT:         Value:           8
+
+
+; CHECK:      - Type:            CODE
+; CHECK-NEXT:   Functions:
+; CHECK-NEXT:     - Index:           0
+; CHECK-NEXT:       Locals:          []
+; CHECK-NEXT:       Body:            0B
+; CHECK-NEXT:     - Index:           1
+; CHECK-NEXT:       Locals:          []
+; CHECK-NEXT:       Body:            20002401200041004108FC0800000B
+
+; Expected body of __wasm_init_tls:
+;   local.get 0
+;   global.set  1
+;   local.get 0
+;   i32.const 0
+;   i32.const 8
+;   memory.init 0, 0
+;   end
+
+; CHECK-NEXT:     - Index:           2
+; CHECK-NEXT:       Locals:          []
+; CHECK-NEXT:       Body:            2381808080004180808080006A0B
+
+; Expected body of tls1_addr:
+;   global.get 1
+;   i32.const 0
+;   i32.add
+;   end
+
+; CHECK-NEXT:     - Index:           3
+; CHECK-NEXT:       Locals:          []
+; CHECK-NEXT:       Body:            2381808080004184808080006A0B
+
+; Expected body of tls1_addr:
+;   global.get 1
+;   i32.const 4
+;   i32.add
+;   end
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 3de69954e0a1a..900cd051dcfb8 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -454,6 +454,7 @@ createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) {
 // Create ABI-defined synthetic symbols
 static void createSyntheticSymbols() {
   static WasmSignature nullSignature = {{}, {}};
+  static WasmSignature i32ArgSignature = {{}, {ValType::I32}};
   static llvm::wasm::WasmGlobalType globalTypeI32 = {WASM_TYPE_I32, false};
   static llvm::wasm::WasmGlobalType mutableGlobalTypeI32 = {WASM_TYPE_I32,
                                                             true};
@@ -516,6 +517,30 @@ static void createSyntheticSymbols() {
     WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base");
   }
 
+  if (config->sharedMemory && !config->shared) {
+    llvm::wasm::WasmGlobal tlsBaseGlobal;
+    tlsBaseGlobal.Type = {WASM_TYPE_I32, true};
+    tlsBaseGlobal.InitExpr.Value.Int32 = 0;
+    tlsBaseGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
+    tlsBaseGlobal.SymbolName = "__tls_base";
+    WasmSym::tlsBase =
+        symtab->addSyntheticGlobal("__tls_base", WASM_SYMBOL_VISIBILITY_HIDDEN,
+                                   make<InputGlobal>(tlsBaseGlobal, nullptr));
+
+    llvm::wasm::WasmGlobal tlsSizeGlobal;
+    tlsSizeGlobal.Type = {WASM_TYPE_I32, false};
+    tlsSizeGlobal.InitExpr.Value.Int32 = 0;
+    tlsSizeGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
+    tlsSizeGlobal.SymbolName = "__tls_size";
+    WasmSym::tlsSize =
+        symtab->addSyntheticGlobal("__tls_size", WASM_SYMBOL_VISIBILITY_HIDDEN,
+                                   make<InputGlobal>(tlsSizeGlobal, nullptr));
+
+    WasmSym::initTLS = symtab->addSyntheticFunction(
+        "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
+        make<SyntheticFunction>(i32ArgSignature, "__wasm_init_tls"));
+  }
+
   WasmSym::dsoHandle = symtab->addSyntheticDataSymbol(
       "__dso_handle", WASM_SYMBOL_VISIBILITY_HIDDEN);
 }
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index 61868f37577fb..7d8d532c8ce16 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -27,11 +27,14 @@ using namespace lld::wasm;
 DefinedFunction *WasmSym::callCtors;
 DefinedFunction *WasmSym::initMemory;
 DefinedFunction *WasmSym::applyRelocs;
+DefinedFunction *WasmSym::initTLS;
 DefinedData *WasmSym::dsoHandle;
 DefinedData *WasmSym::dataEnd;
 DefinedData *WasmSym::globalBase;
 DefinedData *WasmSym::heapBase;
 GlobalSymbol *WasmSym::stackPointer;
+GlobalSymbol *WasmSym::tlsBase;
+GlobalSymbol *WasmSym::tlsSize;
 UndefinedGlobal *WasmSym::tableBase;
 UndefinedGlobal *WasmSym::memoryBase;
 
@@ -200,8 +203,14 @@ DefinedFunction::DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
 
 uint32_t DefinedData::getVirtualAddress() const {
   LLVM_DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n");
-  if (segment)
+  if (segment) {
+    // For thread local data, the symbol location is relative to the start of
+    // the .tdata section, since they are used as offsets from __tls_base.
+    // Hence, we do not add in segment->outputSeg->startVA.
+    if (segment->outputSeg->name == ".tdata")
+      return segment->outputSegmentOffset + offset;
     return segment->outputSeg->startVA + segment->outputSegmentOffset + offset;
+  }
   return offset;
 }
 
diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h
index 499a265be739f..f4816aae70551 100644
--- a/lld/wasm/Symbols.h
+++ b/lld/wasm/Symbols.h
@@ -426,6 +426,15 @@ struct WasmSym {
   // linear memory.
   static GlobalSymbol *stackPointer;
 
+  // __tls_base
+  // Global that holds the address of the base of the current thread's
+  // TLS block.
+  static GlobalSymbol *tlsBase;
+
+  // __tls_size
+  // Symbol whose value is the size of the TLS block.
+  static GlobalSymbol *tlsSize;
+
   // __data_end
   // Symbol marking the end of the data and bss.
   static DefinedData *dataEnd;
@@ -448,6 +457,10 @@ struct WasmSym {
   // Function that applies relocations to data segment post-instantiation.
   static DefinedFunction *applyRelocs;
 
+  // __wasm_init_tls
+  // Function that allocates thread-local storage and initializes it.
+  static DefinedFunction *initTLS;
+
   // __dso_handle
   // Symbol used in calls to __cxa_atexit to determine current DLL
   static DefinedData *dsoHandle;
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 77a29a2d99ef4..23a63edee7cca 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -57,6 +57,7 @@ class Writer {
   void createInitMemoryFunction();
   void createApplyRelocationsFunction();
   void createCallCtorsFunction();
+  void createInitTLSFunction();
 
   void assignIndexes();
   void populateSymtab();
@@ -242,6 +243,11 @@ void Writer::layoutMemory() {
     log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", seg->name,
                 memoryPtr, seg->size, seg->alignment));
     memoryPtr += seg->size;
+
+    if (WasmSym::tlsSize && seg->name == ".tdata") {
+      auto *tlsSize = cast<DefinedGlobal>(WasmSym::tlsSize);
+      tlsSize->global->global.InitExpr.Value.Int32 = seg->size;
+    }
   }
 
   // TODO: Add .bss space here.
@@ -353,6 +359,7 @@ void Writer::populateTargetFeatures() {
   StringMap<std::string> used;
   StringMap<std::string> required;
   StringMap<std::string> disallowed;
+  bool tlsUsed = false;
 
   // Only infer used features if user did not specify features
   bool inferFeatures = !config->features.hasValue();
@@ -385,6 +392,14 @@ void Writer::populateTargetFeatures() {
               std::to_string(feature.Prefix));
       }
     }
+
+    for (InputSegment *segment : file->segments) {
+      if (!segment->live)
+        continue;
+      StringRef name = segment->getName();
+      if (name.startswith(".tdata") || name.startswith(".tbss"))
+        tlsUsed = true;
+    }
   }
 
   if (inferFeatures)
@@ -411,6 +426,10 @@ void Writer::populateTargetFeatures() {
     error("'bulk-memory' feature must be used in order to emit passive "
           "segments");
 
+  if (!used.count("bulk-memory") && tlsUsed)
+    error("'bulk-memory' feature must be used in order to use thread-local "
+          "storage");
+
   // Validate that used features are allowed in output
   if (!inferFeatures) {
     for (auto &feature : used.keys()) {
@@ -492,8 +511,8 @@ void Writer::calculateExports() {
       // implement in all major browsers.
       // See: https://github.com/WebAssembly/mutable-global
       if (g->getGlobalType()->Mutable) {
-        // Only the __stack_pointer should ever be create as mutable.
-        assert(g == WasmSym::stackPointer);
+        // Only __stack_pointer and __tls_base should ever be create as mutable.
+        assert(g == WasmSym::stackPointer || g == WasmSym::tlsBase);
         continue;
       }
       export_ = {name, WASM_EXTERNAL_GLOBAL, g->getGlobalIndex()};
@@ -602,6 +621,11 @@ static StringRef getOutputDataSegmentName(StringRef name) {
   // we only have a single __memory_base to use as our base address.
   if (config->isPic)
     return ".data";
+  // We only support one thread-local segment, so we must merge the segments
+  // despite --no-merge-data-segments.
+  // We also need to merge .tbss into .tdata so they share the same offsets.
+  if (name.startswith(".tdata") || name.startswith(".tbss"))
+    return ".tdata";
   if (!config->mergeDataSegments)
     return name;
   if (name.startswith(".text."))
@@ -625,7 +649,7 @@ void Writer::createOutputSegments() {
       if (s == nullptr) {
         LLVM_DEBUG(dbgs() << "new segment: " << name << "\n");
         s = make<OutputSegment>(name, segments.size());
-        if (config->passiveSegments)
+        if (config->passiveSegments || name == ".tdata")
           s->initFlags = WASM_SEGMENT_IS_PASSIVE;
         segments.push_back(s);
       }
@@ -655,7 +679,7 @@ void Writer::createInitMemoryFunction() {
 
     // initialize passive data segments
     for (const OutputSegment *s : segments) {
-      if (s->initFlags & WASM_SEGMENT_IS_PASSIVE) {
+      if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") {
         // destination address
         writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
         writeSleb128(os, s->startVA, "destination address");
@@ -737,6 +761,49 @@ void Writer::createCallCtorsFunction() {
   createFunction(WasmSym::callCtors, bodyContent);
 }
 
+void Writer::createInitTLSFunction() {
+  if (!WasmSym::initTLS->isLive())
+    return;
+
+  std::string bodyContent;
+  {
+    raw_string_ostream os(bodyContent);
+
+    OutputSegment *tlsSeg = nullptr;
+    for (auto *seg : segments) {
+      if (seg->name == ".tdata")
+        tlsSeg = seg;
+      break;
+    }
+
+    writeUleb128(os, 0, "num locals");
+    if (tlsSeg) {
+      writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
+      writeUleb128(os, 0, "local index");
+
+      writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set");
+      writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index");
+
+      writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
+      writeUleb128(os, 0, "local index");
+
+      writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
+      writeSleb128(os, 0, "segment offset");
+
+      writeU8(os, WASM_OPCODE_I32_CONST, "i32.const");
+      writeSleb128(os, tlsSeg->size, "memory region size");
+
+      writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
+      writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "MEMORY.INIT");
+      writeUleb128(os, tlsSeg->index, "segment index immediate");
+      writeU8(os, 0, "memory index immediate");
+    }
+    writeU8(os, WASM_OPCODE_END, "end function");
+  }
+
+  createFunction(WasmSym::initTLS, bodyContent);
+}
+
 // Populate InitFunctions vector with init functions from all input objects.
 // This is then used either when creating the output linking section or to
 // synthesize the "__wasm_call_ctors" function.
@@ -829,6 +896,12 @@ void Writer::run() {
     createCallCtorsFunction();
   }
 
+  if (config->sharedMemory && !config->shared)
+    createInitTLSFunction();
+
+  if (errorCount())
+    return;
+
   log("-- calculateTypes");
   calculateTypes();
   log("-- calculateExports");
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 4f6c24bbc68df..0f22bfe610c6c 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -242,7 +242,9 @@ enum : unsigned {
 enum : unsigned {
   WASM_OPCODE_END = 0x0b,
   WASM_OPCODE_CALL = 0x10,
+  WASM_OPCODE_LOCAL_GET = 0x20,
   WASM_OPCODE_GLOBAL_GET = 0x23,
+  WASM_OPCODE_GLOBAL_SET = 0x24,
   WASM_OPCODE_I32_STORE = 0x36,
   WASM_OPCODE_I32_CONST = 0x41,
   WASM_OPCODE_I64_CONST = 0x42,
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 1731995b28734..1b892727547dc 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -124,4 +124,13 @@ def int_wasm_data_drop :
             [llvm_i32_ty],
             [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>;
 
+//===----------------------------------------------------------------------===//
+// Thread-local storage intrinsics
+//===----------------------------------------------------------------------===//
+
+def int_wasm_tls_size :
+  Intrinsic<[llvm_anyint_ty],
+            [],
+            [IntrNoMem, IntrSpeculatable]>;
+
 } // TargetPrefix = "wasm"
diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h
index 1adc812649232..2941a40f3b8c6 100644
--- a/llvm/include/llvm/MC/MCSectionWasm.h
+++ b/llvm/include/llvm/MC/MCSectionWasm.h
@@ -66,7 +66,8 @@ class MCSectionWasm final : public MCSection {
   bool isVirtualSection() const override;
 
   bool isWasmData() const {
-    return Kind.isGlobalWriteableData() || Kind.isReadOnly();
+    return Kind.isGlobalWriteableData() || Kind.isReadOnly() ||
+           Kind.isThreadLocal();
   }
 
   bool isUnique() const { return UniqueID != ~0U; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 312b203859d51..2552e91508334 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -233,6 +233,8 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
       return false;
     if (Addr.getGlobalValue())
       return false;
+    if (GV->isThreadLocal())
+      return false;
     Addr.setGlobalValue(GV);
     return true;
   }
@@ -614,6 +616,8 @@ unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) {
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) {
     if (TLI.isPositionIndependent())
       return 0;
+    if (GV->isThreadLocal())
+      return 0;
     unsigned ResultReg =
         createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass
                                                : &WebAssembly::I32RegClass);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index bd699d92f76c7..1efbb3b067b85 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "WebAssembly.h"
 #include "WebAssemblyTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h" // To access function attributes.
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
@@ -171,6 +172,54 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
     }
   }
 
+  case ISD::GlobalTLSAddress: {
+    const auto *GA = cast<GlobalAddressSDNode>(Node);
+
+    if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
+      report_fatal_error("cannot use thread-local storage without bulk memory",
+                         false);
+
+    if (GA->getGlobal()->getThreadLocalMode() !=
+        GlobalValue::LocalExecTLSModel) {
+      report_fatal_error("only -ftls-model=local-exec is supported for now",
+                         false);
+    }
+
+    MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+    assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+    SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT);
+    SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress(
+        GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0);
+
+    MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32,
+                                                    DL, MVT::i32, TLSBaseSym);
+    MachineSDNode *TLSOffset = CurDAG->getMachineNode(
+        WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym);
+    MachineSDNode *TLSAddress =
+        CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32,
+                               SDValue(TLSBase, 0), SDValue(TLSOffset, 0));
+    ReplaceNode(Node, TLSAddress);
+    return;
+  }
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    case Intrinsic::wasm_tls_size: {
+      MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout());
+      assert(PtrVT == MVT::i32 && "only wasm32 is supported for now");
+
+      MachineSDNode *TLSSize = CurDAG->getMachineNode(
+          WebAssembly::GLOBAL_GET_I32, DL, PtrVT,
+          CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32));
+      ReplaceNode(Node, TLSSize);
+      return;
+    }
+    }
+    break;
+  }
+
   default:
     break;
   }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 611f05f949691..288b991ae2c54 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -77,9 +77,11 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
   // functions. It's OK to hardcode knowledge of specific symbols here; this
   // method is precisely there for fetching the signatures of known
   // Clang-provided symbols.
-  if (strcmp(Name, "__stack_pointer") == 0 ||
-      strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0) {
-    bool Mutable = strcmp(Name, "__stack_pointer") == 0;
+  if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 ||
+      strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 ||
+      strcmp(Name, "__tls_size") == 0) {
+    bool Mutable =
+        strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0;
     WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
     WasmSym->setGlobalType(wasm::WasmGlobalType{
         uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index a75df34979bd9..7e65368e671a5 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -186,13 +186,21 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass {
     for (auto &F : M)
       replaceFeatures(F, FeatureStr);
 
-    bool Stripped = false;
-    if (!Features[WebAssembly::FeatureAtomics]) {
-      Stripped |= stripAtomics(M);
-      Stripped |= stripThreadLocals(M);
-    }
+    bool StrippedAtomics = false;
+    bool StrippedTLS = false;
+
+    if (!Features[WebAssembly::FeatureAtomics])
+      StrippedAtomics = stripAtomics(M);
+
+    if (!Features[WebAssembly::FeatureBulkMemory])
+      StrippedTLS = stripThreadLocals(M);
+
+    if (StrippedAtomics && !StrippedTLS)
+      stripThreadLocals(M);
+    else if (StrippedTLS && !StrippedAtomics)
+      stripAtomics(M);
 
-    recordFeatures(M, Features, Stripped);
+    recordFeatures(M, Features, StrippedAtomics || StrippedTLS);
 
     // Conservatively assume we have made some change
     return true;
@@ -271,7 +279,8 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass {
         // "atomics" is special: code compiled without atomics may have had its
         // atomics lowered to nonatomic operations. In that case, atomics is
         // disallowed to prevent unsafe linking with atomics-enabled objects.
-        assert(!Features[WebAssembly::FeatureAtomics]);
+        assert(!Features[WebAssembly::FeatureAtomics] ||
+               !Features[WebAssembly::FeatureBulkMemory]);
         M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey,
                         wasm::WASM_FEATURE_PREFIX_DISALLOWED);
       } else if (Features[KV.Value]) {
diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
index a5c08f850e228..c25b9e59b1b22 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS
-; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS
+; RUN: llc < %s -mattr=-bulk-memory | FileCheck %s --check-prefixes NO-BULK-MEM
+; RUN: llc < %s -mattr=+bulk-memory | FileCheck %s --check-prefixes BULK-MEM
 
 ; Test that the target features section contains -atomics or +atomics
 ; for modules that have thread local storage in their source.
@@ -9,18 +9,18 @@ target triple = "wasm32-unknown-unknown"
 
 @foo = internal thread_local global i32 0
 
-; CHECK-LABEL: .custom_section.target_features,"",@
+; -bulk-memory
+; NO-BULK-MEM-LABEL: .custom_section.target_features,"",@
+; NO-BULK-MEM-NEXT: .int8 1
+; NO-BULK-MEM-NEXT: .int8 45
+; NO-BULK-MEM-NEXT: .int8 7
+; NO-BULK-MEM-NEXT: .ascii "atomics"
+; NO-BULK-MEM-NEXT: .bss.foo,"",@
 
-; -atomics
-; NO-ATOMICS-NEXT: .int8 1
-; NO-ATOMICS-NEXT: .int8 45
-; NO-ATOMICS-NEXT: .int8 7
-; NO-ATOMICS-NEXT: .ascii "atomics"
-; NO-ATOMICS-NEXT: .bss.foo,"",@
-
-; +atomics
-; ATOMICS-NEXT: .int8 1
-; ATOMICS-NEXT: .int8 43
-; ATOMICS-NEXT: .int8 7
-; ATOMICS-NEXT: .ascii "atomics"
-; ATOMICS-NEXT: .tbss.foo,"",@
+; +bulk-memory
+; BULK-MEM-LABEL: .custom_section.target_features,"",@
+; BULK-MEM-NEXT: .int8 1
+; BULK-MEM-NEXT: .int8 43
+; BULK-MEM-NEXT: .int8 11
+; BULK-MEM-NEXT: .ascii "bulk-memory"
+; BULK-MEM-NEXT: .tbss.foo,"",@
diff --git a/llvm/test/CodeGen/WebAssembly/tls.ll b/llvm/test/CodeGen/WebAssembly/tls.ll
index 21e84f9fa9799..02979a28af99b 100644
--- a/llvm/test/CodeGen/WebAssembly/tls.ll
+++ b/llvm/test/CodeGen/WebAssembly/tls.ll
@@ -1,17 +1,82 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
-; SINGLE-LABEL: address_of_tls:
+; CHECK-LABEL: address_of_tls:
+; CHECK-NEXT: .functype  address_of_tls () -> (i32)
 define i32 @address_of_tls() {
-  ; SINGLE: i32.const $push0=, tls
-  ; SINGLE-NEXT: return $pop0
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const tls
+  ; NO-TLS-NEXT: return
   ret i32 ptrtoint(i32* @tls to i32)
 }
 
-; SINGLE: .type	tls,@object
-; SINGLE-NEXT: .section	.bss.tls,"",@
-; SINGLE-NEXT: .p2align 2
-; SINGLE-NEXT: tls:
-; SINGLE-NEXT: .int32 0
-@tls = internal thread_local global i32 0
+; CHECK-LABEL: ptr_to_tls:
+; CHECK-NEXT: .functype ptr_to_tls () -> (i32)
+define i32* @ptr_to_tls() {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const tls
+  ; NO-TLS-NEXT: return
+  ret i32* @tls
+}
+
+; CHECK-LABEL: tls_load:
+; CHECK-NEXT: .functype tls_load () -> (i32)
+define i32 @tls_load() {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: i32.load 0
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const 0
+  ; NO-TLS-NEXT: i32.load tls
+  ; NO-TLS-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; CHECK-LABEL: tls_store:
+; CHECK-NEXT: .functype tls_store (i32) -> ()
+define void @tls_store(i32 %x) {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: i32.store 0
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const 0
+  ; NO-TLS-NEXT: i32.store tls
+  ; NO-TLS-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+define i32 @tls_size() {
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; TLS-NEXT: .section .tbss.tls,"",@
+; NO-TLS-NEXT: .section .bss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
+@tls = internal thread_local(localexec) global i32 0
+
+declare i32 @llvm.wasm.tls.size.i32()

From fa575839225a0b04ed0ba923e8eacdf6d369d7fe Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 22:01:30 +0000
Subject: [PATCH 298/451] Add REQUIRES: x86 to safeseh-no.s test for x86

llvm-svn: 366273
---
 lld/test/COFF/safeseh-no.s | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lld/test/COFF/safeseh-no.s b/lld/test/COFF/safeseh-no.s
index 2a301a3ba9b83..70d6f56e88803 100644
--- a/lld/test/COFF/safeseh-no.s
+++ b/lld/test/COFF/safeseh-no.s
@@ -1,3 +1,4 @@
+# REQUIRES: x86
 # RUN: llvm-mc -triple i686-windows-msvc %s -filetype=obj -o %t.obj
 # RUN: not lld-link %t.obj -safeseh -out:%t.exe -entry:main 2>&1 | FileCheck %s --check-prefix=ERROR
 # safe seh should be on by default.

From fe66fdb8f3076a5146c274bba1258b402cf4e726 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Tue, 16 Jul 2019 22:10:16 +0000
Subject: [PATCH 299/451] [TableGen] Add "getOperandType" to get operand types
 from opcode/opidx

The InstrInfoEmitter outputs an enum called "OperandType" which gives
numerical IDs to each operand type. This patch makes use of this enum
to define a function called "getOperandType", which allows looking up
the type of an operand given its opcode and operand index.

Patch by Nicolas Guillemot. Thanks!

Differential Revision: https://reviews.llvm.org/D63320

llvm-svn: 366274
---
 llvm/test/TableGen/get-operand-type.td   | 40 ++++++++++++++++
 llvm/utils/TableGen/InstrInfoEmitter.cpp | 61 ++++++++++++++++++++++--
 2 files changed, 97 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/TableGen/get-operand-type.td

diff --git a/llvm/test/TableGen/get-operand-type.td b/llvm/test/TableGen/get-operand-type.td
new file mode 100644
index 0000000000000..5be2c777c8ae9
--- /dev/null
+++ b/llvm/test/TableGen/get-operand-type.td
@@ -0,0 +1,40 @@
+// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s
+
+// Check that getOperandType has the expected info in it
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+def Reg : Register<"reg">;
+def RegClass : RegisterClass<"foo", [i32], 0, (add Reg)>;
+
+def OpA : Operand<i32>;
+def OpB : Operand<i32>;
+
+def InstA : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs OpA:$a);
+  let InOperandList = (ins OpB:$b, i32imm:$c);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+  let Namespace = "MyNamespace";
+}
+
+def InstB : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs i32imm:$d);
+  let InOperandList = (ins unknown:$x);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+  let Namespace = "MyNamespace";
+}
+
+// CHECK: #ifdef GET_INSTRINFO_OPERAND_TYPE
+// CHECK: { OpTypes::OpA, OpTypes::OpB, OpTypes::i32imm, }
+// CHECK-NEXT: { OpTypes::i32imm, -1, }
+// CHECK: #endif //GET_INSTRINFO_OPERAND_TYPE
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index a4d66bb871cc6..d92585685e145 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -76,7 +76,9 @@ class InstrInfoEmitter {
                   std::map<std::vector<Record*>, unsigned> &EL,
                   const OperandInfoMapTy &OpInfo,
                   raw_ostream &OS);
-  void emitOperandTypesEnum(raw_ostream &OS, const CodeGenTarget &Target);
+  void emitOperandTypeMappings(
+      raw_ostream &OS, const CodeGenTarget &Target,
+      ArrayRef<const CodeGenInstruction *> NumberedInstructions);
   void initOperandMapData(
             ArrayRef<const CodeGenInstruction *> NumberedInstructions,
             StringRef Namespace,
@@ -324,8 +326,9 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
 /// Generate an enum for all the operand types for this target, under the
 /// llvm::TargetNamespace::OpTypes namespace.
 /// Operand types are all definitions derived of the Operand Target.td class.
-void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
-                                            const CodeGenTarget &Target) {
+void InstrInfoEmitter::emitOperandTypeMappings(
+    raw_ostream &OS, const CodeGenTarget &Target,
+    ArrayRef<const CodeGenInstruction *> NumberedInstructions) {
 
   StringRef Namespace = Target.getInstNamespace();
   std::vector<Record *> Operands = Records.getAllDerivedDefinitions("Operand");
@@ -349,6 +352,56 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
   OS << "} // end namespace " << Namespace << "\n";
   OS << "} // end namespace llvm\n";
   OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n";
+
+  OS << "#ifdef GET_INSTRINFO_OPERAND_TYPE\n";
+  OS << "#undef GET_INSTRINFO_OPERAND_TYPE\n";
+  OS << "namespace llvm {\n";
+  OS << "namespace " << Namespace << " {\n";
+  OS << "LLVM_READONLY\n";
+  OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
+  if (!NumberedInstructions.empty()) {
+    OS << "  static const std::initializer_list<int> OpcodeOperandTypes[] = "
+          "{\n";
+    for (const CodeGenInstruction *Inst : NumberedInstructions) {
+      OS << "    { ";
+      for (const auto &Op : Inst->Operands) {
+        // Handle aggregate operands and normal operands the same way by
+        // expanding either case into a list of operands for this op.
+        std::vector<CGIOperandList::OperandInfo> OperandList;
+
+        const DagInit *MIOI = Op.MIOperandInfo;
+        if (!MIOI || MIOI->getNumArgs() == 0) {
+          // Single, anonymous, operand.
+          OperandList.push_back(Op);
+        } else {
+          for (unsigned j = 0, e = Op.MINumOperands; j != e; ++j) {
+            OperandList.push_back(Op);
+
+            auto *OpR = cast<DefInit>(MIOI->getArg(j))->getDef();
+            OperandList.back().Rec = OpR;
+          }
+        }
+
+        for (unsigned j = 0, e = OperandList.size(); j != e; ++j) {
+          Record *OpR = OperandList[j].Rec;
+          if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous())
+            OS << "OpTypes::" << OpR->getName();
+          else
+            OS << -1;
+          OS << ", ";
+        }
+      }
+      OS << "},\n";
+    }
+    OS << "  };\n";
+    OS << "  return OpcodeOperandTypes[Opcode].begin()[OpIdx];\n";
+  } else {
+    OS << "  llvm_unreachable(\"No instructions defined\");\n";
+  }
+  OS << "}\n";
+  OS << "} // end namespace " << Namespace << "\n";
+  OS << "} // end namespace llvm\n";
+  OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n";
 }
 
 void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS,
@@ -560,7 +613,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   emitOperandNameMappings(OS, Target, NumberedInstructions);
 
-  emitOperandTypesEnum(OS, Target);
+  emitOperandTypeMappings(OS, Target, NumberedInstructions);
 
   emitMCIIHelperMethods(OS, TargetName);
 }

From 0a8d4df7999eaa876ff7c7510c964f6ec127ffa8 Mon Sep 17 00:00:00 2001
From: Guanzhong Chen <gzchen@google.com>
Date: Tue, 16 Jul 2019 22:22:08 +0000
Subject: [PATCH 300/451] [WebAssembly] Compile all TLS on Emscripten as
 local-exec

Summary:
Currently, on Emscripten, dynamic linking is not supported with threads.
This means that if thread-local storage is used, it must be used in a
statically-linked executable. Hence, local-exec is the only possible model.

This diff compiles all TLS variables to use local-exec on Emscripten as a
temporary measure until dynamic linking is supported with threads.

The goal for this is to allow C++ types with constructors to be thread-local.

Currently, when `clang` compiles a `thread_local` variable with a constructor,
it generates `__tls_guard` variable:

    @__tls_guard = internal thread_local global i8 0, align 1

As no TLS model is specified, this is treated as general-dynamic, which we do
not support (and cannot support without implementing dynamic linking support
with threads in Emscripten). As a result, any C++ constructor in `thread_local`
variables would not compile.

By compiling all `thread_local` as local-exec, `__tls_guard` will compile and
we can support C++ constructors with TLS without implementing dynamic linking
with threads.

Depends on D64537

Reviewers: tlively, aheejin, sbc100

Reviewed By: aheejin

Subscribers: dschuff, jgravelle-google, hiraditya, sunfish, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64776

llvm-svn: 366275
---
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   | 12 ++-
 .../WebAssembly/tls-general-dynamic.ll        | 86 +++++++++++++++++++
 .../WebAssembly/{tls.ll => tls-local-exec.ll} |  0
 3 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll
 rename llvm/test/CodeGen/WebAssembly/{tls.ll => tls-local-exec.ll} (100%)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 1efbb3b067b85..26339eaef37db 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -179,9 +179,17 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
       report_fatal_error("cannot use thread-local storage without bulk memory",
                          false);
 
+    // Currently Emscripten does not support dynamic linking with threads.
+    // Therefore, if we have thread-local storage, only the local-exec model
+    // is possible.
+    // TODO: remove this and implement proper TLS models once Emscripten
+    // supports dynamic linking with threads.
     if (GA->getGlobal()->getThreadLocalMode() !=
-        GlobalValue::LocalExecTLSModel) {
-      report_fatal_error("only -ftls-model=local-exec is supported for now",
+            GlobalValue::LocalExecTLSModel &&
+        !Subtarget->getTargetTriple().isOSEmscripten()) {
+      report_fatal_error("only -ftls-model=local-exec is supported for now on "
+                         "non-Emscripten OSes: variable " +
+                             GA->getGlobal()->getName(),
                          false);
     }
 
diff --git a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll
new file mode 100644
index 0000000000000..3f6d9d325c68c
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll
@@ -0,0 +1,86 @@
+; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory 2>&1 | FileCheck %s --check-prefix=ERROR
+; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel 2>&1 | FileCheck %s --check-prefix=ERROR
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory --mtriple wasm32-unknown-emscripten | FileCheck %s --check-prefixes=CHECK,TLS
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory --mtriple wasm32-unknown-emscripten -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS
+; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; ERROR: LLVM ERROR: only -ftls-model=local-exec is supported for now on non-Emscripten OSes: variable tls
+
+; CHECK-LABEL: address_of_tls:
+; CHECK-NEXT: .functype  address_of_tls () -> (i32)
+define i32 @address_of_tls() {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const tls
+  ; NO-TLS-NEXT: return
+  ret i32 ptrtoint(i32* @tls to i32)
+}
+
+; CHECK-LABEL: ptr_to_tls:
+; CHECK-NEXT: .functype ptr_to_tls () -> (i32)
+define i32* @ptr_to_tls() {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const tls
+  ; NO-TLS-NEXT: return
+  ret i32* @tls
+}
+
+; CHECK-LABEL: tls_load:
+; CHECK-NEXT: .functype tls_load () -> (i32)
+define i32 @tls_load() {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: i32.load 0
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const 0
+  ; NO-TLS-NEXT: i32.load tls
+  ; NO-TLS-NEXT: return
+  %tmp = load i32, i32* @tls, align 4
+  ret i32 %tmp
+}
+
+; CHECK-LABEL: tls_store:
+; CHECK-NEXT: .functype tls_store (i32) -> ()
+define void @tls_store(i32 %x) {
+  ; TLS-DAG: global.get __tls_base
+  ; TLS-DAG: i32.const tls
+  ; TLS-NEXT: i32.add
+  ; TLS-NEXT: i32.store 0
+  ; TLS-NEXT: return
+
+  ; NO-TLS-NEXT: i32.const 0
+  ; NO-TLS-NEXT: i32.store tls
+  ; NO-TLS-NEXT: return
+  store i32 %x, i32* @tls, align 4
+  ret void
+}
+
+; CHECK-LABEL: tls_size:
+; CHECK-NEXT: .functype tls_size () -> (i32)
+define i32 @tls_size() {
+; CHECK-NEXT: global.get __tls_size
+; CHECK-NEXT: return
+  %1 = call i32 @llvm.wasm.tls.size.i32()
+  ret i32 %1
+}
+
+; CHECK: .type tls,@object
+; TLS-NEXT: .section .tbss.tls,"",@
+; NO-TLS-NEXT: .section .bss.tls,"",@
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: tls:
+; CHECK-NEXT: .int32 0
+@tls = internal thread_local global i32 0
+
+declare i32 @llvm.wasm.tls.size.i32()
diff --git a/llvm/test/CodeGen/WebAssembly/tls.ll b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll
similarity index 100%
rename from llvm/test/CodeGen/WebAssembly/tls.ll
rename to llvm/test/CodeGen/WebAssembly/tls-local-exec.ll

From fdeed837edf354558cf0c6b0a0dd3af2124906b8 Mon Sep 17 00:00:00 2001
From: George Burgess IV <george.burgess.iv@gmail.com>
Date: Tue, 16 Jul 2019 22:32:17 +0000
Subject: [PATCH 301/451] Fix a typo in target features

There was a slight typo in r364352 that ended up causing our backend to
complain on some x86 Android builds. This CL fixes that.

Differential Revision: https://reviews.llvm.org/D64781

llvm-svn: 366276
---
 clang/lib/Driver/ToolChains/Arch/X86.cpp | 2 +-
 clang/test/Driver/clang-translation.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index 2e75039bf0d65..34be226b69e98 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -135,7 +135,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
     if (ArchType == llvm::Triple::x86_64) {
       Features.push_back("+sse4.2");
       Features.push_back("+popcnt");
-      Features.push_back("+mcx16");
+      Features.push_back("+cx16");
     } else
       Features.push_back("+ssse3");
   }
diff --git a/clang/test/Driver/clang-translation.c b/clang/test/Driver/clang-translation.c
index 0054535115aad..766e779382692 100644
--- a/clang/test/Driver/clang-translation.c
+++ b/clang/test/Driver/clang-translation.c
@@ -318,7 +318,7 @@
 // ANDROID-X86_64: "-target-cpu" "x86-64"
 // ANDROID-X86_64: "-target-feature" "+sse4.2"
 // ANDROID-X86_64: "-target-feature" "+popcnt"
-// ANDROID-X86_64: "-target-feature" "+mcx16"
+// ANDROID-X86_64: "-target-feature" "+cx16"
 
 // RUN: %clang -target mips-linux-gnu -### -S %s 2>&1 | \
 // RUN: FileCheck -check-prefix=MIPS %s

From 418516c7b8658994622273a2a44a9fba3280dcfc Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Tue, 16 Jul 2019 22:39:18 +0000
Subject: [PATCH 302/451] [TableGen] Generate offsets into a flat array for
 getOperandType

Rather than an array of std::initializer_list, generate a table of
offsets and a flat array of the operands for getOperandType. This is a
bit more efficient on platforms that don't manage to get the array of
inintializer_lists initialized at link time (I'm looking at you
macOS). It's also quite quite a bit faster to compile.

llvm-svn: 366278
---
 llvm/test/TableGen/get-operand-type.td   |  4 +-
 llvm/utils/TableGen/InstrInfoEmitter.cpp | 63 ++++++++++++++----------
 2 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/llvm/test/TableGen/get-operand-type.td b/llvm/test/TableGen/get-operand-type.td
index 5be2c777c8ae9..69bcde38c7ef2 100644
--- a/llvm/test/TableGen/get-operand-type.td
+++ b/llvm/test/TableGen/get-operand-type.td
@@ -35,6 +35,6 @@ def InstB : Instruction {
 }
 
 // CHECK: #ifdef GET_INSTRINFO_OPERAND_TYPE
-// CHECK: { OpTypes::OpA, OpTypes::OpB, OpTypes::i32imm, }
-// CHECK-NEXT: { OpTypes::i32imm, -1, }
+// CHECK:        OpTypes::OpA, OpTypes::OpB, OpTypes::i32imm,
+// CHECK-NEXT:   OpTypes::i32imm, -1,
 // CHECK: #endif //GET_INSTRINFO_OPERAND_TYPE
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index d92585685e145..2d367f538b71f 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -213,7 +213,7 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
 }
 
 /// Initialize data structures for generating operand name mappings.
-/// 
+///
 /// \param Operands [out] A map used to generate the OpName enum with operand
 ///        names as its keys and operand enum values as its values.
 /// \param OperandMap [out] A map for representing the operand name mappings for
@@ -360,41 +360,54 @@ void InstrInfoEmitter::emitOperandTypeMappings(
   OS << "LLVM_READONLY\n";
   OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n";
   if (!NumberedInstructions.empty()) {
-    OS << "  static const std::initializer_list<int> OpcodeOperandTypes[] = "
-          "{\n";
+    std::vector<int> OperandOffsets;
+    std::vector<Record *> OperandRecords;
+    int CurrentOffset = 0;
     for (const CodeGenInstruction *Inst : NumberedInstructions) {
-      OS << "    { ";
+      OperandOffsets.push_back(CurrentOffset);
       for (const auto &Op : Inst->Operands) {
-        // Handle aggregate operands and normal operands the same way by
-        // expanding either case into a list of operands for this op.
-        std::vector<CGIOperandList::OperandInfo> OperandList;
-
         const DagInit *MIOI = Op.MIOperandInfo;
         if (!MIOI || MIOI->getNumArgs() == 0) {
           // Single, anonymous, operand.
-          OperandList.push_back(Op);
+          OperandRecords.push_back(Op.Rec);
+          ++CurrentOffset;
         } else {
-          for (unsigned j = 0, e = Op.MINumOperands; j != e; ++j) {
-            OperandList.push_back(Op);
-
-            auto *OpR = cast<DefInit>(MIOI->getArg(j))->getDef();
-            OperandList.back().Rec = OpR;
+          for (Init *Arg : make_range(MIOI->arg_begin(), MIOI->arg_end())) {
+            OperandRecords.push_back(cast<DefInit>(Arg)->getDef());
+            ++CurrentOffset;
           }
         }
-
-        for (unsigned j = 0, e = OperandList.size(); j != e; ++j) {
-          Record *OpR = OperandList[j].Rec;
-          if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous())
-            OS << "OpTypes::" << OpR->getName();
-          else
-            OS << -1;
-          OS << ", ";
-        }
       }
-      OS << "},\n";
     }
+
+    // Emit the table of offsets for the opcode lookup.
+    OS << "  const int Offsets[] = {\n";
+    for (int I = 0, E = OperandOffsets.size(); I != E; ++I)
+      OS << "    " << OperandOffsets[I] << ",\n";
     OS << "  };\n";
-    OS << "  return OpcodeOperandTypes[Opcode].begin()[OpIdx];\n";
+
+    // Add an entry for the end so that we don't need to special case it below.
+    OperandOffsets.push_back(OperandRecords.size());
+    // Emit the actual operand types in a flat table.
+    OS << "  const int OpcodeOperandTypes[] = {\n    ";
+    for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) {
+      // We print each Opcode's operands in its own row.
+      if (I == OperandOffsets[CurOffset]) {
+        OS << "\n    ";
+        // If there are empty rows, mark them with an empty comment.
+        while (OperandOffsets[++CurOffset] == I)
+          OS << "/**/\n    ";
+      }
+      Record *OpR = OperandRecords[I];
+      if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous())
+        OS << "OpTypes::" << OpR->getName();
+      else
+        OS << -1;
+      OS << ", ";
+    }
+    OS << "\n  };\n";
+
+    OS << "  return OpcodeOperandTypes[Offsets[Opcode] + OpIdx];\n";
   } else {
     OS << "  llvm_unreachable(\"No instructions defined\");\n";
   }

From 1c3f4ec7fc1d0c58ec4024a064c055462448e50f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 22:41:34 +0000
Subject: [PATCH 303/451] GlobalISel: Add overload of handleAssignments with
 CCState

AMDGPU needs to allocate special argument registers separately from
the user function argument list, so needs direct control over the
CCState.

The ArgLocs argument is only really necessary because CCState doesn't
allow access to it.

llvm-svn: 366279
---
 llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h |  6 +++++-
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp        | 13 +++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index d8d15bd0713ad..d717121ad78ec 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -27,6 +27,7 @@
 
 namespace llvm {
 
+class CCState;
 class DataLayout;
 class Function;
 class MachineIRBuilder;
@@ -163,7 +164,10 @@ class CallLowering {
   /// \return True if everything has succeeded, false otherwise.
   bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
                          ValueHandler &Handler) const;
-
+  bool handleAssignments(CCState &CCState,
+                         SmallVectorImpl<CCValAssign> &ArgLocs,
+                         MachineIRBuilder &MIRBuilder, ArrayRef<ArgInfo> Args,
+                         ValueHandler &Handler) const;
 public:
   CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
   virtual ~CallLowering() = default;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 342fb18d9d613..a5d8205a34a8e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -163,10 +163,19 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
                                      ValueHandler &Handler) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
-  const DataLayout &DL = F.getParent()->getDataLayout();
-
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+  return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler);
+}
+
+bool CallLowering::handleAssignments(CCState &CCInfo,
+                                     SmallVectorImpl<CCValAssign> &ArgLocs,
+                                     MachineIRBuilder &MIRBuilder,
+                                     ArrayRef<ArgInfo> Args,
+                                     ValueHandler &Handler) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const Function &F = MF.getFunction();
+  const DataLayout &DL = F.getParent()->getDataLayout();
 
   unsigned NumArgs = Args.size();
   for (unsigned i = 0; i != NumArgs; ++i) {

From 1bd9c6547f92e694d8d724efb757bc9e8b1f3607 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 16 Jul 2019 22:41:38 +0000
Subject: [PATCH 304/451] ARM: Fix missing immarg for space intrinsic

llvm-svn: 366280
---
 llvm/include/llvm/IR/IntrinsicsARM.td      | 2 +-
 llvm/test/Verifier/ARM/intrinsic-immarg.ll | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 886f1d7fd1bc6..4792af097d95d 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -19,7 +19,7 @@ let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
 // A space-consuming intrinsic primarily for testing ARMConstantIslands. The
 // first argument is the number of bytes this "instruction" takes up, the second
 // and return value are essentially chains, used to force ordering during ISel.
-def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
 
 // 16-bit multiplications
 def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
diff --git a/llvm/test/Verifier/ARM/intrinsic-immarg.ll b/llvm/test/Verifier/ARM/intrinsic-immarg.ll
index b578c6d76195c..d069dd682fdb5 100644
--- a/llvm/test/Verifier/ARM/intrinsic-immarg.ll
+++ b/llvm/test/Verifier/ARM/intrinsic-immarg.ll
@@ -100,3 +100,12 @@ define void @mcrr2(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
   call void @llvm.arm.mcrr2(i32 0, i32 1, i32 2, i32 3, i32 %arg4)
   ret void
 }
+
+declare i32 @llvm.arm.space(i32, i32) nounwind
+define i32 @space(i32 %arg0, i32 %arg1) {
+  ; CHECK: immarg operand has non-immediate parameter
+  ; CHECK-NEXT: i32 %arg0
+  ; CHECK-NEXT: call i32 @llvm.arm.space(i32 %arg0, i32 %arg1)
+  %space = call i32 @llvm.arm.space(i32 %arg0, i32 %arg1)
+  ret i32 %space
+}

From e71679082c0ed1598a924aa974376e8ce21c1cea Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Tue, 16 Jul 2019 23:01:59 +0000
Subject: [PATCH 305/451] add a workaround in GetLine to account for ReadFile
 not reporintg error

Summary:
ReadFile on Windows is supposed to set ERROR_OPERATION_ABORTED according
to the docs on MSDN. However, this has evidently been a known bug since
Windows 8. Therefore, we can't detect if a signal interrupted in the
fgets. So pressing ctrl-c causes the repl to end and the process to
exit. A temporary workaround is just to attempt to fgets twice until
this bug is fixed.

A possible alternative would be to set a flag in the `sigint_handler`
and simply check that flag in the true part of the if statement.
However, signal handlers on Windows are asynchronous and this would
require sleeping on the repl loop thread while still not necessarily
guarnateeing that you caught the sigint.

Reviewers: jfb

Differential Revision: https://reviews.llvm.org/D64660

llvm-svn: 366281
---
 lldb/source/Core/IOHandler.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp
index 3a7a75e8ae56b..b30308490cca5 100644
--- a/lldb/source/Core/IOHandler.cpp
+++ b/lldb/source/Core/IOHandler.cpp
@@ -374,7 +374,18 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) {
       bool got_line = false;
       m_editing = true;
       while (!done) {
+#ifdef _WIN32
+        // ReadFile on Windows is supposed to set ERROR_OPERATION_ABORTED
+        // according to the docs on MSDN. However, this has evidently been a
+        // known bug since Windows 8. Therefore, we can't detect if a signal
+        // interrupted in the fgets. So pressing ctrl-c causes the repl to end
+        // and the process to exit. A temporary workaround is just to attempt to
+        // fgets twice until this bug is fixed.
+        if (fgets(buffer, sizeof(buffer), in) == nullptr &&
+            fgets(buffer, sizeof(buffer), in) == nullptr) {
+#else
         if (fgets(buffer, sizeof(buffer), in) == nullptr) {
+#endif
           const int saved_errno = errno;
           if (feof(in))
             done = true;

From 2ecca781a15915b82333e90f92ac8c9b7d48560d Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 23:38:05 +0000
Subject: [PATCH 306/451] Fix darwin-ld.c if dsymutil.exe exists on PATH

llvm-svn: 366282
---
 clang/test/Driver/darwin-ld.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/darwin-ld.c b/clang/test/Driver/darwin-ld.c
index f01eeb4ea28e0..eb357a9819ffc 100644
--- a/clang/test/Driver/darwin-ld.c
+++ b/clang/test/Driver/darwin-ld.c
@@ -5,9 +5,9 @@
 
 // Make sure we run dsymutil on source input files.
 // RUN: %clang -target i386-apple-darwin9 -### -g %s -o BAR 2> %t.log
-// RUN: grep '".*dsymutil" "-o" "BAR.dSYM" "BAR"' %t.log
+// RUN: grep '".*dsymutil\(.exe\)\?" "-o" "BAR.dSYM" "BAR"' %t.log
 // RUN: %clang -target i386-apple-darwin9 -### -g -filelist FOO %s -o BAR 2> %t.log
-// RUN: grep '".*dsymutil" "-o" "BAR.dSYM" "BAR"' %t.log
+// RUN: grep '".*dsymutil\(.exe\)\?" "-o" "BAR.dSYM" "BAR"' %t.log
 
 // Check linker changes that came with new linkedit format.
 // RUN: touch %t.o

From e5012ab308200792ea5e12e54a36be13380882ea Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Tue, 16 Jul 2019 23:44:21 +0000
Subject: [PATCH 307/451] [AMDGPU] Autogenerate register asm names

Differential Revision: https://reviews.llvm.org/D64839

llvm-svn: 366283
---
 .../Target/AMDGPU/AMDGPURegAsmNames.inc.cpp   | 593 ------------------
 llvm/lib/Target/AMDGPU/CMakeLists.txt         |   1 -
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h   |   4 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  79 +--
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      | 183 ++++--
 5 files changed, 139 insertions(+), 721 deletions(-)
 delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
deleted file mode 100644
index eb0cb911b841f..0000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp
+++ /dev/null
@@ -1,593 +0,0 @@
-//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===//
-
-#ifdef AMDGPU_REG_ASM_NAMES
-
-static const char *const VGPR32RegNames[] = {
-    "v0",   "v1",   "v2",   "v3",   "v4",   "v5",   "v6",   "v7",   "v8",
-    "v9",   "v10",  "v11",  "v12",  "v13",  "v14",  "v15",  "v16",  "v17",
-    "v18",  "v19",  "v20",  "v21",  "v22",  "v23",  "v24",  "v25",  "v26",
-    "v27",  "v28",  "v29",  "v30",  "v31",  "v32",  "v33",  "v34",  "v35",
-    "v36",  "v37",  "v38",  "v39",  "v40",  "v41",  "v42",  "v43",  "v44",
-    "v45",  "v46",  "v47",  "v48",  "v49",  "v50",  "v51",  "v52",  "v53",
-    "v54",  "v55",  "v56",  "v57",  "v58",  "v59",  "v60",  "v61",  "v62",
-    "v63",  "v64",  "v65",  "v66",  "v67",  "v68",  "v69",  "v70",  "v71",
-    "v72",  "v73",  "v74",  "v75",  "v76",  "v77",  "v78",  "v79",  "v80",
-    "v81",  "v82",  "v83",  "v84",  "v85",  "v86",  "v87",  "v88",  "v89",
-    "v90",  "v91",  "v92",  "v93",  "v94",  "v95",  "v96",  "v97",  "v98",
-    "v99",  "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
-    "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
-    "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
-    "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
-    "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
-    "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
-    "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
-    "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
-    "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
-    "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
-    "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
-    "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
-    "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
-    "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
-    "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
-    "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
-    "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
-    "v252", "v253", "v254", "v255"
-};
-
-static const char *const SGPR32RegNames[] = {
-    "s0",   "s1",   "s2",   "s3",   "s4",  "s5",  "s6",  "s7",  "s8",  "s9",
-    "s10",  "s11",  "s12",  "s13",  "s14", "s15", "s16", "s17", "s18", "s19",
-    "s20",  "s21",  "s22",  "s23",  "s24", "s25", "s26", "s27", "s28", "s29",
-    "s30",  "s31",  "s32",  "s33",  "s34", "s35", "s36", "s37", "s38", "s39",
-    "s40",  "s41",  "s42",  "s43",  "s44", "s45", "s46", "s47", "s48", "s49",
-    "s50",  "s51",  "s52",  "s53",  "s54", "s55", "s56", "s57", "s58", "s59",
-    "s60",  "s61",  "s62",  "s63",  "s64", "s65", "s66", "s67", "s68", "s69",
-    "s70",  "s71",  "s72",  "s73",  "s74", "s75", "s76", "s77", "s78", "s79",
-    "s80",  "s81",  "s82",  "s83",  "s84", "s85", "s86", "s87", "s88", "s89",
-    "s90",  "s91",  "s92",  "s93",  "s94", "s95", "s96", "s97", "s98", "s99",
-    "s100", "s101", "s102", "s103", "s104", "s105"
-};
-
-static const char *const VGPR64RegNames[] = {
-    "v[0:1]",     "v[1:2]",     "v[2:3]",     "v[3:4]",     "v[4:5]",
-    "v[5:6]",     "v[6:7]",     "v[7:8]",     "v[8:9]",     "v[9:10]",
-    "v[10:11]",   "v[11:12]",   "v[12:13]",   "v[13:14]",   "v[14:15]",
-    "v[15:16]",   "v[16:17]",   "v[17:18]",   "v[18:19]",   "v[19:20]",
-    "v[20:21]",   "v[21:22]",   "v[22:23]",   "v[23:24]",   "v[24:25]",
-    "v[25:26]",   "v[26:27]",   "v[27:28]",   "v[28:29]",   "v[29:30]",
-    "v[30:31]",   "v[31:32]",   "v[32:33]",   "v[33:34]",   "v[34:35]",
-    "v[35:36]",   "v[36:37]",   "v[37:38]",   "v[38:39]",   "v[39:40]",
-    "v[40:41]",   "v[41:42]",   "v[42:43]",   "v[43:44]",   "v[44:45]",
-    "v[45:46]",   "v[46:47]",   "v[47:48]",   "v[48:49]",   "v[49:50]",
-    "v[50:51]",   "v[51:52]",   "v[52:53]",   "v[53:54]",   "v[54:55]",
-    "v[55:56]",   "v[56:57]",   "v[57:58]",   "v[58:59]",   "v[59:60]",
-    "v[60:61]",   "v[61:62]",   "v[62:63]",   "v[63:64]",   "v[64:65]",
-    "v[65:66]",   "v[66:67]",   "v[67:68]",   "v[68:69]",   "v[69:70]",
-    "v[70:71]",   "v[71:72]",   "v[72:73]",   "v[73:74]",   "v[74:75]",
-    "v[75:76]",   "v[76:77]",   "v[77:78]",   "v[78:79]",   "v[79:80]",
-    "v[80:81]",   "v[81:82]",   "v[82:83]",   "v[83:84]",   "v[84:85]",
-    "v[85:86]",   "v[86:87]",   "v[87:88]",   "v[88:89]",   "v[89:90]",
-    "v[90:91]",   "v[91:92]",   "v[92:93]",   "v[93:94]",   "v[94:95]",
-    "v[95:96]",   "v[96:97]",   "v[97:98]",   "v[98:99]",   "v[99:100]",
-    "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]",
-    "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]",
-    "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]",
-    "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]",
-    "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]",
-    "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]",
-    "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]",
-    "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]",
-    "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]",
-    "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]",
-    "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]",
-    "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]",
-    "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]",
-    "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]",
-    "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]",
-    "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]",
-    "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]",
-    "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]",
-    "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]",
-    "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]",
-    "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]",
-    "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]",
-    "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]",
-    "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]",
-    "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]",
-    "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]",
-    "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]",
-    "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]",
-    "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]",
-    "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]",
-    "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]"
-};
-
-static const char *const VGPR96RegNames[] = {
-    "v[0:2]",     "v[1:3]",     "v[2:4]",     "v[3:5]",     "v[4:6]",
-    "v[5:7]",     "v[6:8]",     "v[7:9]",     "v[8:10]",    "v[9:11]",
-    "v[10:12]",   "v[11:13]",   "v[12:14]",   "v[13:15]",   "v[14:16]",
-    "v[15:17]",   "v[16:18]",   "v[17:19]",   "v[18:20]",   "v[19:21]",
-    "v[20:22]",   "v[21:23]",   "v[22:24]",   "v[23:25]",   "v[24:26]",
-    "v[25:27]",   "v[26:28]",   "v[27:29]",   "v[28:30]",   "v[29:31]",
-    "v[30:32]",   "v[31:33]",   "v[32:34]",   "v[33:35]",   "v[34:36]",
-    "v[35:37]",   "v[36:38]",   "v[37:39]",   "v[38:40]",   "v[39:41]",
-    "v[40:42]",   "v[41:43]",   "v[42:44]",   "v[43:45]",   "v[44:46]",
-    "v[45:47]",   "v[46:48]",   "v[47:49]",   "v[48:50]",   "v[49:51]",
-    "v[50:52]",   "v[51:53]",   "v[52:54]",   "v[53:55]",   "v[54:56]",
-    "v[55:57]",   "v[56:58]",   "v[57:59]",   "v[58:60]",   "v[59:61]",
-    "v[60:62]",   "v[61:63]",   "v[62:64]",   "v[63:65]",   "v[64:66]",
-    "v[65:67]",   "v[66:68]",   "v[67:69]",   "v[68:70]",   "v[69:71]",
-    "v[70:72]",   "v[71:73]",   "v[72:74]",   "v[73:75]",   "v[74:76]",
-    "v[75:77]",   "v[76:78]",   "v[77:79]",   "v[78:80]",   "v[79:81]",
-    "v[80:82]",   "v[81:83]",   "v[82:84]",   "v[83:85]",   "v[84:86]",
-    "v[85:87]",   "v[86:88]",   "v[87:89]",   "v[88:90]",   "v[89:91]",
-    "v[90:92]",   "v[91:93]",   "v[92:94]",   "v[93:95]",   "v[94:96]",
-    "v[95:97]",   "v[96:98]",   "v[97:99]",   "v[98:100]",  "v[99:101]",
-    "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]",
-    "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]",
-    "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]",
-    "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]",
-    "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]",
-    "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]",
-    "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]",
-    "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]",
-    "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]",
-    "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]",
-    "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]",
-    "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]",
-    "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]",
-    "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]",
-    "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]",
-    "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]",
-    "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]",
-    "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]",
-    "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]",
-    "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]",
-    "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]",
-    "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]",
-    "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]",
-    "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]",
-    "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]",
-    "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]",
-    "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]",
-    "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]",
-    "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]",
-    "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]",
-    "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]"
-};
-
-static const char *const VGPR128RegNames[] = {
-    "v[0:3]",     "v[1:4]",     "v[2:5]",     "v[3:6]",     "v[4:7]",
-    "v[5:8]",     "v[6:9]",     "v[7:10]",    "v[8:11]",    "v[9:12]",
-    "v[10:13]",   "v[11:14]",   "v[12:15]",   "v[13:16]",   "v[14:17]",
-    "v[15:18]",   "v[16:19]",   "v[17:20]",   "v[18:21]",   "v[19:22]",
-    "v[20:23]",   "v[21:24]",   "v[22:25]",   "v[23:26]",   "v[24:27]",
-    "v[25:28]",   "v[26:29]",   "v[27:30]",   "v[28:31]",   "v[29:32]",
-    "v[30:33]",   "v[31:34]",   "v[32:35]",   "v[33:36]",   "v[34:37]",
-    "v[35:38]",   "v[36:39]",   "v[37:40]",   "v[38:41]",   "v[39:42]",
-    "v[40:43]",   "v[41:44]",   "v[42:45]",   "v[43:46]",   "v[44:47]",
-    "v[45:48]",   "v[46:49]",   "v[47:50]",   "v[48:51]",   "v[49:52]",
-    "v[50:53]",   "v[51:54]",   "v[52:55]",   "v[53:56]",   "v[54:57]",
-    "v[55:58]",   "v[56:59]",   "v[57:60]",   "v[58:61]",   "v[59:62]",
-    "v[60:63]",   "v[61:64]",   "v[62:65]",   "v[63:66]",   "v[64:67]",
-    "v[65:68]",   "v[66:69]",   "v[67:70]",   "v[68:71]",   "v[69:72]",
-    "v[70:73]",   "v[71:74]",   "v[72:75]",   "v[73:76]",   "v[74:77]",
-    "v[75:78]",   "v[76:79]",   "v[77:80]",   "v[78:81]",   "v[79:82]",
-    "v[80:83]",   "v[81:84]",   "v[82:85]",   "v[83:86]",   "v[84:87]",
-    "v[85:88]",   "v[86:89]",   "v[87:90]",   "v[88:91]",   "v[89:92]",
-    "v[90:93]",   "v[91:94]",   "v[92:95]",   "v[93:96]",   "v[94:97]",
-    "v[95:98]",   "v[96:99]",   "v[97:100]",  "v[98:101]",  "v[99:102]",
-    "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]",
-    "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]",
-    "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]",
-    "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]",
-    "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]",
-    "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]",
-    "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]",
-    "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]",
-    "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]",
-    "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]",
-    "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]",
-    "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]",
-    "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]",
-    "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]",
-    "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]",
-    "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]",
-    "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]",
-    "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]",
-    "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]",
-    "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]",
-    "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]",
-    "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]",
-    "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]",
-    "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]",
-    "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]",
-    "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]",
-    "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]",
-    "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]",
-    "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]",
-    "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]",
-    "v[250:253]", "v[251:254]", "v[252:255]"
-};
-
-static const char *const VGPR256RegNames[] = {
-    "v[0:7]",     "v[1:8]",     "v[2:9]",     "v[3:10]",    "v[4:11]",
-    "v[5:12]",    "v[6:13]",    "v[7:14]",    "v[8:15]",    "v[9:16]",
-    "v[10:17]",   "v[11:18]",   "v[12:19]",   "v[13:20]",   "v[14:21]",
-    "v[15:22]",   "v[16:23]",   "v[17:24]",   "v[18:25]",   "v[19:26]",
-    "v[20:27]",   "v[21:28]",   "v[22:29]",   "v[23:30]",   "v[24:31]",
-    "v[25:32]",   "v[26:33]",   "v[27:34]",   "v[28:35]",   "v[29:36]",
-    "v[30:37]",   "v[31:38]",   "v[32:39]",   "v[33:40]",   "v[34:41]",
-    "v[35:42]",   "v[36:43]",   "v[37:44]",   "v[38:45]",   "v[39:46]",
-    "v[40:47]",   "v[41:48]",   "v[42:49]",   "v[43:50]",   "v[44:51]",
-    "v[45:52]",   "v[46:53]",   "v[47:54]",   "v[48:55]",   "v[49:56]",
-    "v[50:57]",   "v[51:58]",   "v[52:59]",   "v[53:60]",   "v[54:61]",
-    "v[55:62]",   "v[56:63]",   "v[57:64]",   "v[58:65]",   "v[59:66]",
-    "v[60:67]",   "v[61:68]",   "v[62:69]",   "v[63:70]",   "v[64:71]",
-    "v[65:72]",   "v[66:73]",   "v[67:74]",   "v[68:75]",   "v[69:76]",
-    "v[70:77]",   "v[71:78]",   "v[72:79]",   "v[73:80]",   "v[74:81]",
-    "v[75:82]",   "v[76:83]",   "v[77:84]",   "v[78:85]",   "v[79:86]",
-    "v[80:87]",   "v[81:88]",   "v[82:89]",   "v[83:90]",   "v[84:91]",
-    "v[85:92]",   "v[86:93]",   "v[87:94]",   "v[88:95]",   "v[89:96]",
-    "v[90:97]",   "v[91:98]",   "v[92:99]",   "v[93:100]",  "v[94:101]",
-    "v[95:102]",  "v[96:103]",  "v[97:104]",  "v[98:105]",  "v[99:106]",
-    "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]",
-    "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]",
-    "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]",
-    "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]",
-    "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]",
-    "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]",
-    "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]",
-    "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]",
-    "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]",
-    "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]",
-    "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]",
-    "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]",
-    "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]",
-    "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]",
-    "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]",
-    "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]",
-    "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]",
-    "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]",
-    "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]",
-    "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]",
-    "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]",
-    "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]",
-    "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]",
-    "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]",
-    "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]",
-    "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]",
-    "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]",
-    "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]",
-    "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]",
-    "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]"
-};
-
-static const char *const VGPR512RegNames[] = {
-    "v[0:15]",    "v[1:16]",    "v[2:17]",    "v[3:18]",    "v[4:19]",
-    "v[5:20]",    "v[6:21]",    "v[7:22]",    "v[8:23]",    "v[9:24]",
-    "v[10:25]",   "v[11:26]",   "v[12:27]",   "v[13:28]",   "v[14:29]",
-    "v[15:30]",   "v[16:31]",   "v[17:32]",   "v[18:33]",   "v[19:34]",
-    "v[20:35]",   "v[21:36]",   "v[22:37]",   "v[23:38]",   "v[24:39]",
-    "v[25:40]",   "v[26:41]",   "v[27:42]",   "v[28:43]",   "v[29:44]",
-    "v[30:45]",   "v[31:46]",   "v[32:47]",   "v[33:48]",   "v[34:49]",
-    "v[35:50]",   "v[36:51]",   "v[37:52]",   "v[38:53]",   "v[39:54]",
-    "v[40:55]",   "v[41:56]",   "v[42:57]",   "v[43:58]",   "v[44:59]",
-    "v[45:60]",   "v[46:61]",   "v[47:62]",   "v[48:63]",   "v[49:64]",
-    "v[50:65]",   "v[51:66]",   "v[52:67]",   "v[53:68]",   "v[54:69]",
-    "v[55:70]",   "v[56:71]",   "v[57:72]",   "v[58:73]",   "v[59:74]",
-    "v[60:75]",   "v[61:76]",   "v[62:77]",   "v[63:78]",   "v[64:79]",
-    "v[65:80]",   "v[66:81]",   "v[67:82]",   "v[68:83]",   "v[69:84]",
-    "v[70:85]",   "v[71:86]",   "v[72:87]",   "v[73:88]",   "v[74:89]",
-    "v[75:90]",   "v[76:91]",   "v[77:92]",   "v[78:93]",   "v[79:94]",
-    "v[80:95]",   "v[81:96]",   "v[82:97]",   "v[83:98]",   "v[84:99]",
-    "v[85:100]",  "v[86:101]",  "v[87:102]",  "v[88:103]",  "v[89:104]",
-    "v[90:105]",  "v[91:106]",  "v[92:107]",  "v[93:108]",  "v[94:109]",
-    "v[95:110]",  "v[96:111]",  "v[97:112]",  "v[98:113]",  "v[99:114]",
-    "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]",
-    "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]",
-    "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]",
-    "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]",
-    "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]",
-    "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]",
-    "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]",
-    "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]",
-    "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]",
-    "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]",
-    "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]",
-    "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]",
-    "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]",
-    "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]",
-    "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]",
-    "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]",
-    "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]",
-    "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]",
-    "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]",
-    "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]",
-    "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]",
-    "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]",
-    "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]",
-    "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]",
-    "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]",
-    "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]",
-    "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]",
-    "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]",
-    "v[240:255]"
-};
-
-static const char *const SGPR64RegNames[] = {
-    "s[0:1]",   "s[2:3]",   "s[4:5]",     "s[6:7]",     "s[8:9]",   "s[10:11]",
-    "s[12:13]", "s[14:15]", "s[16:17]",   "s[18:19]",   "s[20:21]", "s[22:23]",
-    "s[24:25]", "s[26:27]", "s[28:29]",   "s[30:31]",   "s[32:33]", "s[34:35]",
-    "s[36:37]", "s[38:39]", "s[40:41]",   "s[42:43]",   "s[44:45]", "s[46:47]",
-    "s[48:49]", "s[50:51]", "s[52:53]",   "s[54:55]",   "s[56:57]", "s[58:59]",
-    "s[60:61]", "s[62:63]", "s[64:65]",   "s[66:67]",   "s[68:69]", "s[70:71]",
-    "s[72:73]", "s[74:75]", "s[76:77]",   "s[78:79]",   "s[80:81]", "s[82:83]",
-    "s[84:85]", "s[86:87]", "s[88:89]",   "s[90:91]",   "s[92:93]", "s[94:95]",
-    "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]", "s[104:105]"
-};
-
-static const char *const SGPR128RegNames[] = {
-    "s[0:3]",   "s[4:7]",     "s[8:11]",  "s[12:15]", "s[16:19]", "s[20:23]",
-    "s[24:27]", "s[28:31]",   "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]",
-    "s[48:51]", "s[52:55]",   "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]",
-    "s[72:75]", "s[76:79]",   "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]",
-    "s[96:99]", "s[100:103]"
-};
-
-static const char *const SGPR256RegNames[] = {
-    "s[0:7]",   "s[4:11]",  "s[8:15]",  "s[12:19]", "s[16:23]",
-    "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]",
-    "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]",
-    "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]",
-    "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]"
-};
-
-static const char *const SGPR512RegNames[] = {
-    "s[0:15]",  "s[4:19]",  "s[8:23]",  "s[12:27]", "s[16:31]",  "s[20:35]",
-    "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]",  "s[44:59]",
-    "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]",  "s[68:83]",
-    "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]"
-};
-
-static const char *const AGPR32RegNames[] = {
-    "a0",   "a1",   "a2",   "a3",   "a4",   "a5",   "a6",   "a7",   "a8",
-    "a9",   "a10",  "a11",  "a12",  "a13",  "a14",  "a15",  "a16",  "a17",
-    "a18",  "a19",  "a20",  "a21",  "a22",  "a23",  "a24",  "a25",  "a26",
-    "a27",  "a28",  "a29",  "a30",  "a31",  "a32",  "a33",  "a34",  "a35",
-    "a36",  "a37",  "a38",  "a39",  "a40",  "a41",  "a42",  "a43",  "a44",
-    "a45",  "a46",  "a47",  "a48",  "a49",  "a50",  "a51",  "a52",  "a53",
-    "a54",  "a55",  "a56",  "a57",  "a58",  "a59",  "a60",  "a61",  "a62",
-    "a63",  "a64",  "a65",  "a66",  "a67",  "a68",  "a69",  "a70",  "a71",
-    "a72",  "a73",  "a74",  "a75",  "a76",  "a77",  "a78",  "a79",  "a80",
-    "a81",  "a82",  "a83",  "a84",  "a85",  "a86",  "a87",  "a88",  "a89",
-    "a90",  "a91",  "a92",  "a93",  "a94",  "a95",  "a96",  "a97",  "a98",
-    "a99",  "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
-    "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
-    "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
-    "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
-    "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
-    "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
-    "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
-    "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
-    "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
-    "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
-    "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
-    "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
-    "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
-    "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
-    "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
-    "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
-    "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
-    "a252", "a253", "a254", "a255"
-};
-
-static const char *const AGPR64RegNames[] = {
-    "a[0:1]",     "a[1:2]",     "a[2:3]",     "a[3:4]",     "a[4:5]",
-    "a[5:6]",     "a[6:7]",     "a[7:8]",     "a[8:9]",     "a[9:10]",
-    "a[10:11]",   "a[11:12]",   "a[12:13]",   "a[13:14]",   "a[14:15]",
-    "a[15:16]",   "a[16:17]",   "a[17:18]",   "a[18:19]",   "a[19:20]",
-    "a[20:21]",   "a[21:22]",   "a[22:23]",   "a[23:24]",   "a[24:25]",
-    "a[25:26]",   "a[26:27]",   "a[27:28]",   "a[28:29]",   "a[29:30]",
-    "a[30:31]",   "a[31:32]",   "a[32:33]",   "a[33:34]",   "a[34:35]",
-    "a[35:36]",   "a[36:37]",   "a[37:38]",   "a[38:39]",   "a[39:40]",
-    "a[40:41]",   "a[41:42]",   "a[42:43]",   "a[43:44]",   "a[44:45]",
-    "a[45:46]",   "a[46:47]",   "a[47:48]",   "a[48:49]",   "a[49:50]",
-    "a[50:51]",   "a[51:52]",   "a[52:53]",   "a[53:54]",   "a[54:55]",
-    "a[55:56]",   "a[56:57]",   "a[57:58]",   "a[58:59]",   "a[59:60]",
-    "a[60:61]",   "a[61:62]",   "a[62:63]",   "a[63:64]",   "a[64:65]",
-    "a[65:66]",   "a[66:67]",   "a[67:68]",   "a[68:69]",   "a[69:70]",
-    "a[70:71]",   "a[71:72]",   "a[72:73]",   "a[73:74]",   "a[74:75]",
-    "a[75:76]",   "a[76:77]",   "a[77:78]",   "a[78:79]",   "a[79:80]",
-    "a[80:81]",   "a[81:82]",   "a[82:83]",   "a[83:84]",   "a[84:85]",
-    "a[85:86]",   "a[86:87]",   "a[87:88]",   "a[88:89]",   "a[89:90]",
-    "a[90:91]",   "a[91:92]",   "a[92:93]",   "a[93:94]",   "a[94:95]",
-    "a[95:96]",   "a[96:97]",   "a[97:98]",   "a[98:99]",   "a[99:100]",
-    "a[100:101]", "a[101:102]", "a[102:103]", "a[103:104]", "a[104:105]",
-    "a[105:106]", "a[106:107]", "a[107:108]", "a[108:109]", "a[109:110]",
-    "a[110:111]", "a[111:112]", "a[112:113]", "a[113:114]", "a[114:115]",
-    "a[115:116]", "a[116:117]", "a[117:118]", "a[118:119]", "a[119:120]",
-    "a[120:121]", "a[121:122]", "a[122:123]", "a[123:124]", "a[124:125]",
-    "a[125:126]", "a[126:127]", "a[127:128]", "a[128:129]", "a[129:130]",
-    "a[130:131]", "a[131:132]", "a[132:133]", "a[133:134]", "a[134:135]",
-    "a[135:136]", "a[136:137]", "a[137:138]", "a[138:139]", "a[139:140]",
-    "a[140:141]", "a[141:142]", "a[142:143]", "a[143:144]", "a[144:145]",
-    "a[145:146]", "a[146:147]", "a[147:148]", "a[148:149]", "a[149:150]",
-    "a[150:151]", "a[151:152]", "a[152:153]", "a[153:154]", "a[154:155]",
-    "a[155:156]", "a[156:157]", "a[157:158]", "a[158:159]", "a[159:160]",
-    "a[160:161]", "a[161:162]", "a[162:163]", "a[163:164]", "a[164:165]",
-    "a[165:166]", "a[166:167]", "a[167:168]", "a[168:169]", "a[169:170]",
-    "a[170:171]", "a[171:172]", "a[172:173]", "a[173:174]", "a[174:175]",
-    "a[175:176]", "a[176:177]", "a[177:178]", "a[178:179]", "a[179:180]",
-    "a[180:181]", "a[181:182]", "a[182:183]", "a[183:184]", "a[184:185]",
-    "a[185:186]", "a[186:187]", "a[187:188]", "a[188:189]", "a[189:190]",
-    "a[190:191]", "a[191:192]", "a[192:193]", "a[193:194]", "a[194:195]",
-    "a[195:196]", "a[196:197]", "a[197:198]", "a[198:199]", "a[199:200]",
-    "a[200:201]", "a[201:202]", "a[202:203]", "a[203:204]", "a[204:205]",
-    "a[205:206]", "a[206:207]", "a[207:208]", "a[208:209]", "a[209:210]",
-    "a[210:211]", "a[211:212]", "a[212:213]", "a[213:214]", "a[214:215]",
-    "a[215:216]", "a[216:217]", "a[217:218]", "a[218:219]", "a[219:220]",
-    "a[220:221]", "a[221:222]", "a[222:223]", "a[223:224]", "a[224:225]",
-    "a[225:226]", "a[226:227]", "a[227:228]", "a[228:229]", "a[229:230]",
-    "a[230:231]", "a[231:232]", "a[232:233]", "a[233:234]", "a[234:235]",
-    "a[235:236]", "a[236:237]", "a[237:238]", "a[238:239]", "a[239:240]",
-    "a[240:241]", "a[241:242]", "a[242:243]", "a[243:244]", "a[244:245]",
-    "a[245:246]", "a[246:247]", "a[247:248]", "a[248:249]", "a[249:250]",
-    "a[250:251]", "a[251:252]", "a[252:253]", "a[253:254]", "a[254:255]"
-};
-
-static const char *const AGPR128RegNames[] = {
-    "a[0:3]",     "a[1:4]",     "a[2:5]",     "a[3:6]",     "a[4:7]",
-    "a[5:8]",     "a[6:9]",     "a[7:10]",    "a[8:11]",    "a[9:12]",
-    "a[10:13]",   "a[11:14]",   "a[12:15]",   "a[13:16]",   "a[14:17]",
-    "a[15:18]",   "a[16:19]",   "a[17:20]",   "a[18:21]",   "a[19:22]",
-    "a[20:23]",   "a[21:24]",   "a[22:25]",   "a[23:26]",   "a[24:27]",
-    "a[25:28]",   "a[26:29]",   "a[27:30]",   "a[28:31]",   "a[29:32]",
-    "a[30:33]",   "a[31:34]",   "a[32:35]",   "a[33:36]",   "a[34:37]",
-    "a[35:38]",   "a[36:39]",   "a[37:40]",   "a[38:41]",   "a[39:42]",
-    "a[40:43]",   "a[41:44]",   "a[42:45]",   "a[43:46]",   "a[44:47]",
-    "a[45:48]",   "a[46:49]",   "a[47:50]",   "a[48:51]",   "a[49:52]",
-    "a[50:53]",   "a[51:54]",   "a[52:55]",   "a[53:56]",   "a[54:57]",
-    "a[55:58]",   "a[56:59]",   "a[57:60]",   "a[58:61]",   "a[59:62]",
-    "a[60:63]",   "a[61:64]",   "a[62:65]",   "a[63:66]",   "a[64:67]",
-    "a[65:68]",   "a[66:69]",   "a[67:70]",   "a[68:71]",   "a[69:72]",
-    "a[70:73]",   "a[71:74]",   "a[72:75]",   "a[73:76]",   "a[74:77]",
-    "a[75:78]",   "a[76:79]",   "a[77:80]",   "a[78:81]",   "a[79:82]",
-    "a[80:83]",   "a[81:84]",   "a[82:85]",   "a[83:86]",   "a[84:87]",
-    "a[85:88]",   "a[86:89]",   "a[87:90]",   "a[88:91]",   "a[89:92]",
-    "a[90:93]",   "a[91:94]",   "a[92:95]",   "a[93:96]",   "a[94:97]",
-    "a[95:98]",   "a[96:99]",   "a[97:100]",  "a[98:101]",  "a[99:102]",
-    "a[100:103]", "a[101:104]", "a[102:105]", "a[103:106]", "a[104:107]",
-    "a[105:108]", "a[106:109]", "a[107:110]", "a[108:111]", "a[109:112]",
-    "a[110:113]", "a[111:114]", "a[112:115]", "a[113:116]", "a[114:117]",
-    "a[115:118]", "a[116:119]", "a[117:120]", "a[118:121]", "a[119:122]",
-    "a[120:123]", "a[121:124]", "a[122:125]", "a[123:126]", "a[124:127]",
-    "a[125:128]", "a[126:129]", "a[127:130]", "a[128:131]", "a[129:132]",
-    "a[130:133]", "a[131:134]", "a[132:135]", "a[133:136]", "a[134:137]",
-    "a[135:138]", "a[136:139]", "a[137:140]", "a[138:141]", "a[139:142]",
-    "a[140:143]", "a[141:144]", "a[142:145]", "a[143:146]", "a[144:147]",
-    "a[145:148]", "a[146:149]", "a[147:150]", "a[148:151]", "a[149:152]",
-    "a[150:153]", "a[151:154]", "a[152:155]", "a[153:156]", "a[154:157]",
-    "a[155:158]", "a[156:159]", "a[157:160]", "a[158:161]", "a[159:162]",
-    "a[160:163]", "a[161:164]", "a[162:165]", "a[163:166]", "a[164:167]",
-    "a[165:168]", "a[166:169]", "a[167:170]", "a[168:171]", "a[169:172]",
-    "a[170:173]", "a[171:174]", "a[172:175]", "a[173:176]", "a[174:177]",
-    "a[175:178]", "a[176:179]", "a[177:180]", "a[178:181]", "a[179:182]",
-    "a[180:183]", "a[181:184]", "a[182:185]", "a[183:186]", "a[184:187]",
-    "a[185:188]", "a[186:189]", "a[187:190]", "a[188:191]", "a[189:192]",
-    "a[190:193]", "a[191:194]", "a[192:195]", "a[193:196]", "a[194:197]",
-    "a[195:198]", "a[196:199]", "a[197:200]", "a[198:201]", "a[199:202]",
-    "a[200:203]", "a[201:204]", "a[202:205]", "a[203:206]", "a[204:207]",
-    "a[205:208]", "a[206:209]", "a[207:210]", "a[208:211]", "a[209:212]",
-    "a[210:213]", "a[211:214]", "a[212:215]", "a[213:216]", "a[214:217]",
-    "a[215:218]", "a[216:219]", "a[217:220]", "a[218:221]", "a[219:222]",
-    "a[220:223]", "a[221:224]", "a[222:225]", "a[223:226]", "a[224:227]",
-    "a[225:228]", "a[226:229]", "a[227:230]", "a[228:231]", "a[229:232]",
-    "a[230:233]", "a[231:234]", "a[232:235]", "a[233:236]", "a[234:237]",
-    "a[235:238]", "a[236:239]", "a[237:240]", "a[238:241]", "a[239:242]",
-    "a[240:243]", "a[241:244]", "a[242:245]", "a[243:246]", "a[244:247]",
-    "a[245:248]", "a[246:249]", "a[247:250]", "a[248:251]", "a[249:252]",
-    "a[250:253]", "a[251:254]", "a[252:255]"
-};
-
-static const char *const AGPR512RegNames[] = {
-    "a[0:15]",    "a[1:16]",    "a[2:17]",    "a[3:18]",    "a[4:19]",
-    "a[5:20]",    "a[6:21]",    "a[7:22]",    "a[8:23]",    "a[9:24]",
-    "a[10:25]",   "a[11:26]",   "a[12:27]",   "a[13:28]",   "a[14:29]",
-    "a[15:30]",   "a[16:31]",   "a[17:32]",   "a[18:33]",   "a[19:34]",
-    "a[20:35]",   "a[21:36]",   "a[22:37]",   "a[23:38]",   "a[24:39]",
-    "a[25:40]",   "a[26:41]",   "a[27:42]",   "a[28:43]",   "a[29:44]",
-    "a[30:45]",   "a[31:46]",   "a[32:47]",   "a[33:48]",   "a[34:49]",
-    "a[35:50]",   "a[36:51]",   "a[37:52]",   "a[38:53]",   "a[39:54]",
-    "a[40:55]",   "a[41:56]",   "a[42:57]",   "a[43:58]",   "a[44:59]",
-    "a[45:60]",   "a[46:61]",   "a[47:62]",   "a[48:63]",   "a[49:64]",
-    "a[50:65]",   "a[51:66]",   "a[52:67]",   "a[53:68]",   "a[54:69]",
-    "a[55:70]",   "a[56:71]",   "a[57:72]",   "a[58:73]",   "a[59:74]",
-    "a[60:75]",   "a[61:76]",   "a[62:77]",   "a[63:78]",   "a[64:79]",
-    "a[65:80]",   "a[66:81]",   "a[67:82]",   "a[68:83]",   "a[69:84]",
-    "a[70:85]",   "a[71:86]",   "a[72:87]",   "a[73:88]",   "a[74:89]",
-    "a[75:90]",   "a[76:91]",   "a[77:92]",   "a[78:93]",   "a[79:94]",
-    "a[80:95]",   "a[81:96]",   "a[82:97]",   "a[83:98]",   "a[84:99]",
-    "a[85:100]",  "a[86:101]",  "a[87:102]",  "a[88:103]",  "a[89:104]",
-    "a[90:105]",  "a[91:106]",  "a[92:107]",  "a[93:108]",  "a[94:109]",
-    "a[95:110]",  "a[96:111]",  "a[97:112]",  "a[98:113]",  "a[99:114]",
-    "a[100:115]", "a[101:116]", "a[102:117]", "a[103:118]", "a[104:119]",
-    "a[105:120]", "a[106:121]", "a[107:122]", "a[108:123]", "a[109:124]",
-    "a[110:125]", "a[111:126]", "a[112:127]", "a[113:128]", "a[114:129]",
-    "a[115:130]", "a[116:131]", "a[117:132]", "a[118:133]", "a[119:134]",
-    "a[120:135]", "a[121:136]", "a[122:137]", "a[123:138]", "a[124:139]",
-    "a[125:140]", "a[126:141]", "a[127:142]", "a[128:143]", "a[129:144]",
-    "a[130:145]", "a[131:146]", "a[132:147]", "a[133:148]", "a[134:149]",
-    "a[135:150]", "a[136:151]", "a[137:152]", "a[138:153]", "a[139:154]",
-    "a[140:155]", "a[141:156]", "a[142:157]", "a[143:158]", "a[144:159]",
-    "a[145:160]", "a[146:161]", "a[147:162]", "a[148:163]", "a[149:164]",
-    "a[150:165]", "a[151:166]", "a[152:167]", "a[153:168]", "a[154:169]",
-    "a[155:170]", "a[156:171]", "a[157:172]", "a[158:173]", "a[159:174]",
-    "a[160:175]", "a[161:176]", "a[162:177]", "a[163:178]", "a[164:179]",
-    "a[165:180]", "a[166:181]", "a[167:182]", "a[168:183]", "a[169:184]",
-    "a[170:185]", "a[171:186]", "a[172:187]", "a[173:188]", "a[174:189]",
-    "a[175:190]", "a[176:191]", "a[177:192]", "a[178:193]", "a[179:194]",
-    "a[180:195]", "a[181:196]", "a[182:197]", "a[183:198]", "a[184:199]",
-    "a[185:200]", "a[186:201]", "a[187:202]", "a[188:203]", "a[189:204]",
-    "a[190:205]", "a[191:206]", "a[192:207]", "a[193:208]", "a[194:209]",
-    "a[195:210]", "a[196:211]", "a[197:212]", "a[198:213]", "a[199:214]",
-    "a[200:215]", "a[201:216]", "a[202:217]", "a[203:218]", "a[204:219]",
-    "a[205:220]", "a[206:221]", "a[207:222]", "a[208:223]", "a[209:224]",
-    "a[210:225]", "a[211:226]", "a[212:227]", "a[213:228]", "a[214:229]",
-    "a[215:230]", "a[216:231]", "a[217:232]", "a[218:233]", "a[219:234]",
-    "a[220:235]", "a[221:236]", "a[222:237]", "a[223:238]", "a[224:239]",
-    "a[225:240]", "a[226:241]", "a[227:242]", "a[228:243]", "a[229:244]",
-    "a[230:245]", "a[231:246]", "a[232:247]", "a[233:248]", "a[234:249]",
-    "a[235:250]", "a[236:251]", "a[237:252]", "a[238:253]", "a[239:254]",
-    "a[240:255]"
-};
-
-static const char *const AGPR1024RegNames[] = {
-    "a[0:31]", "a[1:32]", "a[2:33]", "a[3:34]", "a[4:35]",
-    "a[5:36]", "a[6:37]", "a[7:38]", "a[8:39]", "a[9:40]",
-    "a[10:41]", "a[11:42]", "a[12:43]", "a[13:44]", "a[14:45]",
-    "a[15:46]", "a[16:47]", "a[17:48]", "a[18:49]", "a[19:50]",
-    "a[20:51]", "a[21:52]", "a[22:53]", "a[23:54]", "a[24:55]",
-    "a[25:56]", "a[26:57]", "a[27:58]", "a[28:59]", "a[29:60]",
-    "a[30:61]", "a[31:62]", "a[32:63]", "a[33:64]", "a[34:65]",
-    "a[35:66]", "a[36:67]", "a[37:68]", "a[38:69]", "a[39:70]",
-    "a[40:71]", "a[41:72]", "a[42:73]", "a[43:74]", "a[44:75]",
-    "a[45:76]", "a[46:77]", "a[47:78]", "a[48:79]", "a[49:80]",
-    "a[50:81]", "a[51:82]", "a[52:83]", "a[53:84]", "a[54:85]",
-    "a[55:86]", "a[56:87]", "a[57:88]", "a[58:89]", "a[59:90]",
-    "a[60:91]", "a[61:92]", "a[62:93]", "a[63:94]", "a[64:95]",
-    "a[65:96]", "a[66:97]", "a[67:98]", "a[68:99]", "a[69:100]",
-    "a[70:101]", "a[71:102]", "a[72:103]", "a[73:104]", "a[74:105]",
-    "a[75:106]", "a[76:107]", "a[77:108]", "a[78:109]", "a[79:110]",
-    "a[80:111]", "a[81:112]", "a[82:113]", "a[83:114]", "a[84:115]",
-    "a[85:116]", "a[86:117]", "a[87:118]", "a[88:119]", "a[89:120]",
-    "a[90:121]", "a[91:122]", "a[92:123]", "a[93:124]", "a[94:125]",
-    "a[95:126]", "a[96:127]", "a[97:128]", "a[98:129]", "a[99:130]",
-    "a[100:131]", "a[101:132]", "a[102:133]", "a[103:134]", "a[104:135]",
-    "a[105:136]", "a[106:137]", "a[107:138]", "a[108:139]", "a[109:140]",
-    "a[110:141]", "a[111:142]", "a[112:143]", "a[113:144]", "a[114:145]",
-    "a[115:146]", "a[116:147]", "a[117:148]", "a[118:149]", "a[119:150]",
-    "a[120:151]", "a[121:152]", "a[122:153]", "a[123:154]", "a[124:155]",
-    "a[125:156]", "a[126:157]", "a[127:158]", "a[128:159]", "a[129:160]",
-    "a[130:161]", "a[131:162]", "a[132:163]", "a[133:164]", "a[134:165]",
-    "a[135:166]", "a[136:167]", "a[137:168]", "a[138:169]", "a[139:170]",
-    "a[140:171]", "a[141:172]", "a[142:173]", "a[143:174]", "a[144:175]",
-    "a[145:176]", "a[146:177]", "a[147:178]", "a[148:179]", "a[149:180]",
-    "a[150:181]", "a[151:182]", "a[152:183]", "a[153:184]", "a[154:185]",
-    "a[155:186]", "a[156:187]", "a[157:188]", "a[158:189]", "a[159:190]",
-    "a[160:191]", "a[161:192]", "a[162:193]", "a[163:194]", "a[164:195]",
-    "a[165:196]", "a[166:197]", "a[167:198]", "a[168:199]", "a[169:200]",
-    "a[170:201]", "a[171:202]", "a[172:203]", "a[173:204]", "a[174:205]",
-    "a[175:206]", "a[176:207]", "a[177:208]", "a[178:209]", "a[179:210]",
-    "a[180:211]", "a[181:212]", "a[182:213]", "a[183:214]", "a[184:215]",
-    "a[185:216]", "a[186:217]", "a[187:218]", "a[188:219]", "a[189:220]",
-    "a[190:221]", "a[191:222]", "a[192:223]", "a[193:224]", "a[194:225]",
-    "a[195:226]", "a[196:227]", "a[197:228]", "a[198:229]", "a[199:230]",
-    "a[200:231]", "a[201:232]", "a[202:233]", "a[203:234]", "a[204:235]",
-    "a[205:236]", "a[206:237]", "a[207:238]", "a[208:239]", "a[209:240]",
-    "a[210:241]", "a[211:242]", "a[212:243]", "a[213:244]", "a[214:245]",
-    "a[215:246]", "a[216:247]", "a[217:248]", "a[218:249]", "a[219:250]",
-    "a[220:251]", "a[221:252]", "a[222:253]", "a[223:254]", "a[224:255]"
-};
-
-#endif
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 5dbb63dea467f..ab82ae4a6653d 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -59,7 +59,6 @@ add_llvm_target(AMDGPUCodeGen
   AMDGPUOpenCLEnqueuedBlockLowering.cpp
   AMDGPUPromoteAlloca.cpp
   AMDGPUPropagateAttributes.cpp
-  AMDGPURegAsmNames.inc.cpp
   AMDGPURegisterBankInfo.cpp
   AMDGPURegisterInfo.cpp
   AMDGPURewriteOutArguments.cpp
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 0f62f039763ef..b544d1ef36053 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -12,6 +12,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H
 
+#include "AMDGPUMCTargetDesc.h"
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
@@ -25,7 +26,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
   //Autogenerated by tblgen
   void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
                         raw_ostream &O);
-  static const char *getRegisterName(unsigned RegNo);
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = AMDGPU::NoRegAltName);
 
   void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
                  const MCSubtargetInfo &STI) override;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 7c2839ccb4c09..483793fe4dcbb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -16,6 +16,7 @@
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "MCTargetDesc/AMDGPUInstPrinter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -1346,65 +1347,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 }
 
 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
-  #define AMDGPU_REG_ASM_NAMES
-  #include "AMDGPURegAsmNames.inc.cpp"
-
-  #define REG_RANGE(BeginReg, EndReg, RegTable)            \
-    if (Reg >= BeginReg && Reg <= EndReg) {                \
-      unsigned Index = Reg - BeginReg;                     \
-      assert(Index < array_lengthof(RegTable));            \
-      return RegTable[Index];                              \
-    }
-
-  REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames);
-  REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR105, SGPR32RegNames);
-  REG_RANGE(AMDGPU::AGPR0, AMDGPU::AGPR255, AGPR32RegNames);
-  REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames);
-  REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR104_SGPR105, SGPR64RegNames);
-  REG_RANGE(AMDGPU::AGPR0_AGPR1, AMDGPU::AGPR254_AGPR255, AGPR64RegNames);
-  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255,
-            VGPR96RegNames);
-
-  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3,
-            AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255,
-            VGPR128RegNames);
-  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3,
-            AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103,
-            SGPR128RegNames);
-  REG_RANGE(AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3,
-            AMDGPU::AGPR252_AGPR253_AGPR254_AGPR255,
-            AGPR128RegNames);
-
-  REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7,
-            AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
-            VGPR256RegNames);
-
-  REG_RANGE(
-    AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15,
-    AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255,
-    VGPR512RegNames);
-  REG_RANGE(
-    AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3_AGPR4_AGPR5_AGPR6_AGPR7_AGPR8_AGPR9_AGPR10_AGPR11_AGPR12_AGPR13_AGPR14_AGPR15,
-    AMDGPU::AGPR240_AGPR241_AGPR242_AGPR243_AGPR244_AGPR245_AGPR246_AGPR247_AGPR248_AGPR249_AGPR250_AGPR251_AGPR252_AGPR253_AGPR254_AGPR255,
-    AGPR512RegNames);
-
-  REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7,
-            AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
-            SGPR256RegNames);
-
-  REG_RANGE(
-    AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15,
-    AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103,
-    SGPR512RegNames
-  );
-
-  REG_RANGE(
-    AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3_AGPR4_AGPR5_AGPR6_AGPR7_AGPR8_AGPR9_AGPR10_AGPR11_AGPR12_AGPR13_AGPR14_AGPR15_AGPR16_AGPR17_AGPR18_AGPR19_AGPR20_AGPR21_AGPR22_AGPR23_AGPR24_AGPR25_AGPR26_AGPR27_AGPR28_AGPR29_AGPR30_AGPR31,
-    AMDGPU::AGPR224_AGPR225_AGPR226_AGPR227_AGPR228_AGPR229_AGPR230_AGPR231_AGPR232_AGPR233_AGPR234_AGPR235_AGPR236_AGPR237_AGPR238_AGPR239_AGPR240_AGPR241_AGPR242_AGPR243_AGPR244_AGPR245_AGPR246_AGPR247_AGPR248_AGPR249_AGPR250_AGPR251_AGPR252_AGPR253_AGPR254_AGPR255,
-    AGPR1024RegNames);
-
-#undef REG_RANGE
-
   // FIXME: Rename flat_scr so we don't need to special case this.
   switch (Reg) {
   case AMDGPU::FLAT_SCR:
@@ -1414,9 +1356,24 @@ StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
   case AMDGPU::FLAT_SCR_HI:
     return "flat_scratch_hi";
   default:
-    // For the special named registers the default is fine.
-    return TargetRegisterInfo::getRegAsmName(Reg);
+    break;
+  }
+
+  const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
+  unsigned Size = getRegSizeInBits(*RC);
+  unsigned AltName = AMDGPU::NoRegAltName;
+
+  switch (Size) {
+  case 32:   AltName = AMDGPU::Reg32; break;
+  case 64:   AltName = AMDGPU::Reg64; break;
+  case 96:   AltName = AMDGPU::Reg96; break;
+  case 128:  AltName = AMDGPU::Reg128; break;
+  case 160:  AltName = AMDGPU::Reg160; break;
+  case 256:  AltName = AMDGPU::Reg256; break;
+  case 512:  AltName = AMDGPU::Reg512; break;
+  case 1024: AltName = AMDGPU::Reg1024; break;
   }
+  return AMDGPUInstPrinter::getRegisterName(Reg, AltName);
 }
 
 // FIXME: This is very slow. It might be worth creating a map from physreg to
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 4767f3c30ed32..353347073b877 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -37,31 +37,63 @@ class getSubRegs<int size> {
                                               !if(!eq(size, 16), ret16, ret32))))));
 }
 
+let Namespace = "AMDGPU" in {
+defset list<RegAltNameIndex> AllRegAltNameIndices = {
+  def Reg32   : RegAltNameIndex;
+  def Reg64   : RegAltNameIndex;
+  def Reg96   : RegAltNameIndex;
+  def Reg128  : RegAltNameIndex;
+  def Reg160  : RegAltNameIndex;
+  def Reg256  : RegAltNameIndex;
+  def Reg512  : RegAltNameIndex;
+  def Reg1024 : RegAltNameIndex;
+}
+}
+
 //===----------------------------------------------------------------------===//
 //  Declarations that describe the SI registers
 //===----------------------------------------------------------------------===//
-class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
+class SIReg <string n, bits<16> regIdx = 0, string prefix = "",
+             int regNo = !cast<int>(regIdx)> :
+  Register<n, !if(!eq(prefix, ""),
+                [ n, n, n, n, n, n, n, n ],
+                [ prefix # regNo,
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 1), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 2), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 3), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 4), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 7), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 15), 255) # "]",
+                  prefix # "[" # regNo # ":" # !and(!add(regNo, 31), 255) # "]",
+                ])>,
   DwarfRegNum<[!cast<int>(HWEncoding)]> {
   let Namespace = "AMDGPU";
+  let RegAltNameIndices = AllRegAltNameIndices;
 
   // This is the not yet the complete register encoding. An additional
   // bit is set for VGPRs.
   let HWEncoding = regIdx;
 }
 
+class SIRegisterWithSubRegs<string n, list<Register> subregs> :
+  RegisterWithSubRegs<n, subregs> {
+  let RegAltNameIndices = AllRegAltNameIndices;
+  let AltNames = [ n, n, n, n, n, n, n, n ];
+}
+
 // Special Registers
 def VCC_LO : SIReg<"vcc_lo", 106>;
 def VCC_HI : SIReg<"vcc_hi", 107>;
 
 // Pseudo-registers: Used as placeholders during isel and immediately
 // replaced, never seeing the verifier.
-def PRIVATE_RSRC_REG : SIReg<"", 0>;
-def FP_REG : SIReg<"", 0>;
-def SP_REG : SIReg<"", 0>;
-def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>;
+def PRIVATE_RSRC_REG : SIReg<"private_rsrc", 0>;
+def FP_REG : SIReg<"fp", 0>;
+def SP_REG : SIReg<"sp", 0>;
+def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>;
 
 // VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
           DwarfRegAlias<VCC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -71,7 +103,7 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
 def EXEC_LO : SIReg<"exec_lo", 126>;
 def EXEC_HI : SIReg<"exec_hi", 127>;
 
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+def EXEC : SIRegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
            DwarfRegAlias<EXEC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -86,7 +118,7 @@ def SRC_SCC : SIReg<"src_scc", 253>;
 
 // 1-bit pseudo register, for codegen only.
 // Should never be emitted.
-def SCC : SIReg<"">;
+def SCC : SIReg<"scc">;
 
 def M0 : SIReg <"m0", 124>;
 def SGPR_NULL : SIReg<"null", 125>;
@@ -102,7 +134,7 @@ def LDS_DIRECT : SIReg <"lds_direct", 254>;
 def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
 def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
 
-def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
+def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>,
                  DwarfRegAlias<XNACK_MASK_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -113,7 +145,7 @@ def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI
 def TBA_LO : SIReg<"tba_lo", 108>;
 def TBA_HI : SIReg<"tba_hi", 109>;
 
-def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
+def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
           DwarfRegAlias<TBA_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -123,7 +155,7 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
 def TMA_LO : SIReg<"tma_lo", 110>;
 def TMA_HI : SIReg<"tma_hi", 111>;
 
-def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
+def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
           DwarfRegAlias<TMA_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -133,7 +165,7 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
 foreach Index = 0-15 in {
   def TTMP#Index#_vi         : SIReg<"ttmp"#Index, !add(112, Index)>;
   def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
-  def TTMP#Index             : SIReg<"", 0>;
+  def TTMP#Index             : SIReg<"ttmp"#Index, 0>;
 }
 
 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
@@ -143,7 +175,7 @@ multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
 }
 
 class FlatReg <Register lo, Register hi, bits<16> encoding> :
-    RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+    SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>,
     DwarfRegAlias<lo> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -159,19 +191,19 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
 
 // SGPR registers
 foreach Index = 0-105 in {
-  def SGPR#Index : SIReg <"SGPR"#Index, Index>;
+  def SGPR#Index : SIReg <"SGPR"#Index, Index, "S">;
 }
 
 // VGPR registers
 foreach Index = 0-255 in {
-  def VGPR#Index : SIReg <"VGPR"#Index, Index> {
+  def VGPR#Index : SIReg <"VGPR"#Index, Index, "V"> {
     let HWEncoding{8} = 1;
   }
 }
 
 // AccVGPR registers
 foreach Index = 0-255 in {
-  def AGPR#Index : SIReg <"AGPR"#Index, Index> {
+  def AGPR#Index : SIReg <"AGPR"#Index, Index, "A"> {
     let HWEncoding{8} = 1;
   }
 }
@@ -194,7 +226,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
 
 // SGPR 32-bit registers
 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                            (add (sequence "SGPR%u", 0, 105))> {
+                            (add (sequence "SGPR%u", 0, 105)), Reg32> {
   // Give all SGPR classes higher priority than VGPR classes, because
   // we want to spill SGPRs to VGPRs.
   let AllocationPriority = 9;
@@ -342,7 +374,7 @@ class TmpRegTuplesBase<int index, int size,
                        list<SubRegIndex> indices = getSubRegs<size>.ret,
                        int index1 = !add(index, !add(size, -1)),
                        string name = "ttmp["#index#":"#index1#"]"> :
-  RegisterWithSubRegs<name, subRegs> {
+  SIRegisterWithSubRegs<name, subRegs> {
   let HWEncoding = subRegs[0].HWEncoding;
   let SubRegIndices = indices;
 }
@@ -419,7 +451,7 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT
 // VGPR 32-bit registers
 // i16/f16 only on VI+
 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                            (add (sequence "VGPR%u", 0, 255))> {
+                            (add (sequence "VGPR%u", 0, 255)), Reg32> {
   let AllocationPriority = 1;
   let Size = 32;
 }
@@ -517,7 +549,7 @@ def VGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
 
 // AccVGPR 32-bit registers
 def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                            (add (sequence "AGPR%u", 0, 255))> {
+                            (add (sequence "AGPR%u", 0, 255)), Reg32> {
   let AllocationPriority = 1;
   let Size = 32;
 }
@@ -593,19 +625,19 @@ def AGPR_1024 : RegisterTuples<getSubRegs<32>.ret,
 //===----------------------------------------------------------------------===//
 
 def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> {
+  (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> {
   let isAllocatable = 0;
   let CopyCost = -1;
 }
 
 def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
-  (add PRIVATE_RSRC_REG)> {
+  (add PRIVATE_RSRC_REG), Reg128> {
   let isAllocatable = 0;
   let CopyCost = -1;
 }
 
 def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-  (add LDS_DIRECT)> {
+  (add LDS_DIRECT), Reg32> {
   let isAllocatable = 0;
   let CopyCost = -1;
 }
@@ -616,54 +648,58 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
   (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
    SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
    SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
-   SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
+   SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> {
   let AllocationPriority = 10;
 }
 
 def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
-  (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
+  (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> {
   let AllocationPriority = 10;
 }
 
 def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
-  (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
+  (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> {
   let AllocationPriority = 10;
 }
 
 // Register class for all scalar registers (SGPRs + Special Registers)
 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
-  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
+  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> {
   let AllocationPriority = 10;
 }
 
 def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
-  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> {
+  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS),
+  Reg32> {
   let isAllocatable = 0;
 }
 
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> {
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
+                            (add SGPR_64Regs), Reg64> {
   let CopyCost = 1;
   let AllocationPriority = 11;
 }
 
 // CCR (call clobbered registers) SGPR 64-bit registers
-def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc SGPR_64, 16))> {
+def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+                                (add (trunc SGPR_64, 16)), Reg64> {
   let CopyCost = SGPR_64.CopyCost;
   let AllocationPriority = SGPR_64.AllocationPriority;
 }
 
-def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> {
+def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
+                            (add TTMP_64Regs)> {
   let isAllocatable = 0;
 }
 
 def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
-  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
+  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> {
   let CopyCost = 1;
   let AllocationPriority = 13;
 }
 
 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
-  (add SReg_64_XEXEC, EXEC)> {
+  (add SReg_64_XEXEC, EXEC), Reg64> {
   let CopyCost = 1;
   let AllocationPriority = 13;
 }
@@ -686,25 +722,27 @@ let CopyCost = 2 in {
 // There are no 3-component scalar instructions, but this is needed
 // for symmetry with VGPRs.
 def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
-  (add SGPR_96Regs)> {
+  (add SGPR_96Regs), Reg96> {
   let AllocationPriority = 14;
 }
 
 def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
-  (add SGPR_96)> {
+  (add SGPR_96), Reg96> {
   let AllocationPriority = 14;
 }
 
-def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> {
+def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
+                             (add SGPR_128Regs), Reg128> {
   let AllocationPriority = 15;
 }
 
-def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> {
+def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
+                             (add TTMP_128Regs)> {
   let isAllocatable = 0;
 }
 
 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
-  (add SGPR_128, TTMP_128)> {
+                             (add SGPR_128, TTMP_128), Reg128> {
   let AllocationPriority = 15;
 }
 
@@ -713,16 +751,17 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
 // There are no 5-component scalar instructions, but this is needed
 // for symmetry with VGPRs.
 def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
-  (add SGPR_160Regs)> {
+                             (add SGPR_160Regs), Reg160> {
   let AllocationPriority = 16;
 }
 
 def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
-  (add SGPR_160)> {
+                             (add SGPR_160), Reg160> {
   let AllocationPriority = 16;
 }
 
-def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
+def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs),
+                             Reg256> {
   let AllocationPriority = 17;
 }
 
@@ -731,44 +770,48 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
 }
 
 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
-  (add SGPR_256, TTMP_256)> {
+                             (add SGPR_256, TTMP_256), Reg256> {
   // Requires 4 s_mov_b64 to copy
   let CopyCost = 4;
   let AllocationPriority = 17;
 }
 
-def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> {
+def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add SGPR_512Regs), Reg512> {
   let AllocationPriority = 18;
 }
 
-def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> {
+def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add TTMP_512Regs)> {
   let isAllocatable = 0;
 }
 
 def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
-  (add SGPR_512, TTMP_512)> {
+                             (add SGPR_512, TTMP_512), Reg512> {
   // Requires 8 s_mov_b64 to copy
   let CopyCost = 8;
   let AllocationPriority = 18;
 }
 
 def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                                 (add VGPR_32, LDS_DIRECT_CLASS)> {
+                                 (add VGPR_32, LDS_DIRECT_CLASS), Reg32> {
   let isAllocatable = 0;
 }
 
-def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024Regs)> {
+def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add SGPR_1024Regs), Reg1024> {
   let AllocationPriority = 19;
 }
 
 def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
-  (add SGPR_1024)> {
+                              (add SGPR_1024), Reg1024> {
   let CopyCost = 16;
   let AllocationPriority = 19;
 }
 
 // Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> {
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
+                            (add VGPR_64), Reg64> {
   let Size = 64;
 
   // Requires 2 v_mov_b32 to copy
@@ -776,7 +819,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32
   let AllocationPriority = 2;
 }
 
-def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> {
+def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> {
   let Size = 96;
 
   // Requires 3 v_mov_b32 to copy
@@ -784,7 +827,8 @@ def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> {
   let AllocationPriority = 3;
 }
 
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
+                             (add VGPR_128), Reg128> {
   let Size = 128;
 
   // Requires 4 v_mov_b32 to copy
@@ -792,7 +836,8 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VG
   let AllocationPriority = 4;
 }
 
-def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> {
+def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
+                             (add VGPR_160), Reg160> {
   let Size = 160;
 
   // Requires 5 v_mov_b32 to copy
@@ -800,32 +845,37 @@ def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> {
   let AllocationPriority = 5;
 }
 
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
+                             (add VGPR_256), Reg256> {
   let Size = 256;
   let CopyCost = 8;
   let AllocationPriority = 6;
 }
 
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add VGPR_512), Reg512> {
   let Size = 512;
   let CopyCost = 16;
   let AllocationPriority = 7;
 }
 
-def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add VGPR_1024)> {
+def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add VGPR_1024), Reg1024> {
   let Size = 1024;
   let CopyCost = 32;
   let AllocationPriority = 8;
 }
 
-def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add AGPR_64)> {
+def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32,
+                            (add AGPR_64), Reg64> {
   let Size = 64;
 
   let CopyCost = 5;
   let AllocationPriority = 2;
 }
 
-def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add AGPR_128)> {
+def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
+                             (add AGPR_128), Reg128> {
   let Size = 128;
 
   // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr
@@ -833,38 +883,41 @@ def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add AG
   let AllocationPriority = 4;
 }
 
-def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> {
+def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
+                             (add AGPR_512), Reg512> {
   let Size = 512;
   let CopyCost = 33;
   let AllocationPriority = 7;
 }
 
-// TODO: add v32f32 value type
-def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add AGPR_1024)> {
+def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32,
+                              (add AGPR_1024), Reg1024> {
   let Size = 1024;
   let CopyCost = 65;
   let AllocationPriority = 8;
 }
 
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> {
   let Size = 32;
 }
 
 def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                          (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
+                          (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> {
   let isAllocatable = 0;
 }
 
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64),
+                          Reg64> {
   let isAllocatable = 0;
 }
 
 def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
-                          (add AGPR_32, VGPR_32)> {
+                          (add AGPR_32, VGPR_32), Reg32> {
   let isAllocatable = 0;
 }
 
-def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32, (add AReg_64, VReg_64)> {
+def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32,
+                          (add AReg_64, VReg_64), Reg64> {
   let isAllocatable = 0;
 }
 

From 9304e59c013122aeb8f5bad8f85a89770301b8f1 Mon Sep 17 00:00:00 2001
From: Reid Kleckner <rnk@google.com>
Date: Tue, 16 Jul 2019 23:44:33 +0000
Subject: [PATCH 308/451] Fix OpenCLCXX test on 32-bit Windows where thiscall
 is present

llvm-svn: 366284
---
 clang/test/SemaOpenCLCXX/address-space-deduction.cl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
index f66d224e25414..08668951dbca6 100644
--- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
@@ -30,8 +30,8 @@ struct c2 {
 
 template <class T>
 struct x1 {
-//CHECK: -CXXMethodDecl {{.*}} operator= 'x1<T> &(const x1<T> &) __generic'
-//CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1<int> &(const __generic x1<int> &) __generic'
+//CHECK: -CXXMethodDecl {{.*}} operator= 'x1<T> &(const x1<T> &){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1<int> &(const __generic x1<int> &){{( __attribute__.*)?}} __generic'
   x1<T>& operator=(const x1<T>& xx) {
     y = xx.y;
     return *this;
@@ -41,8 +41,8 @@ struct x1 {
 
 template <class T>
 struct x2 {
-//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1<T> *) __generic'
-//CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1<int> *) __generic'
+//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1<T> *){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1<int> *){{( __attribute__.*)?}} __generic'
   void foo(x1<T>* xx) {
     m[0] = *xx;
   }
@@ -57,9 +57,9 @@ void bar(__global x1<int> *xx, __global x2<int> *bar) {
 template <typename T>
 class x3 : public T {
 public:
-  //CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &) __generic'
+  //CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &){{( __attribute__.*)?}} __generic'
   x3(const x3 &t);
 };
-//CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &) __generic'
+//CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &){{( __attribute__.*)?}} __generic'
 template <typename T>
 x3<T>::x3(const x3<T> &t) {}

From 98a48794ecdfb131c73abc7ef0dd45435863ff4b Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Tue, 16 Jul 2019 23:54:17 +0000
Subject: [PATCH 309/451] Don't require python exe and lib versions to match
 while crosscompiling

Summary:
While cross compiling, the python executable is used to run a handful
of scripts while the libraries are linked and headers are included.
Theoretically it's possible for the versions to match completely, but
requiring the build to match 2.7.10 to 2.7.15 is unnecessary.

Subscribers: mgorny

Differential Revision: https://reviews.llvm.org/D64822

llvm-svn: 366285
---
 lldb/cmake/modules/LLDBConfig.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index ad83153a18898..ccb5de568b43c 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -198,7 +198,8 @@ if (NOT LLDB_DISABLE_PYTHON)
     find_package(PythonLibs REQUIRED)
   endif()
 
-  if (NOT PYTHON_VERSION_STRING VERSION_EQUAL PYTHONLIBS_VERSION_STRING)
+  if (NOT PYTHON_VERSION_STRING VERSION_EQUAL PYTHONLIBS_VERSION_STRING AND
+      NOT CMAKE_CROSSCOMPILING)
     message(FATAL_ERROR "Found incompatible Python interpreter (${PYTHON_VERSION_STRING}) and Python libraries (${PYTHONLIBS_VERSION_STRING})")
   endif()
 

From e56865d40c1691fdd215179c827be66f0cd5bae0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 17 Jul 2019 00:01:03 +0000
Subject: [PATCH 310/451] AMDGPU: Add some missing builtins

llvm-svn: 366286
---
 clang/include/clang/Basic/BuiltinsAMDGPU.def  | 17 +++++
 clang/lib/CodeGen/CGBuiltin.cpp               |  6 ++
 .../CodeGenOpenCL/builtins-amdgcn-gfx10.cl    | 24 +++++++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl   | 64 +++++++++++++++++++
 .../builtins-amdgcn-error-gfx10-param.cl      | 18 ++++++
 .../SemaOpenCL/builtins-amdgcn-error-gfx10.cl | 15 +++++
 6 files changed, 144 insertions(+)
 create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
 create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
 create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e882d3b87c666..2f8fb9000a76a 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -108,6 +108,16 @@ BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n")
 BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n")
 BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n")
 BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n")
+BUILTIN(__builtin_amdgcn_alignbit, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
+BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
 
 //===----------------------------------------------------------------------===//
 // CI+ only builtins.
@@ -162,6 +172,13 @@ TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot2-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts")
 TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts")
 
+//===----------------------------------------------------------------------===//
+// GFX10+ only builtins.
+//===----------------------------------------------------------------------===//
+TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts")
+TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts")
+
 //===----------------------------------------------------------------------===//
 // Special builtins.
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 1658be5a88e02..a300bab49f9c8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -12679,6 +12679,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
 
   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
     return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
+  case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
+    return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
   case AMDGPU::BI__builtin_amdgcn_mov_dpp:
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
     llvm::SmallVector<llvm::Value *, 6> Args;
@@ -12744,6 +12746,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
   case AMDGPU::BI__builtin_amdgcn_lerp:
     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
+  case AMDGPU::BI__builtin_amdgcn_ubfe:
+    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
+  case AMDGPU::BI__builtin_amdgcn_sbfe:
+    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
   case AMDGPU::BI__builtin_amdgcn_uicmp:
   case AMDGPU::BI__builtin_amdgcn_uicmpl:
   case AMDGPU::BI__builtin_amdgcn_sicmp:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
new file mode 100644
index 0000000000000..3921cb90c3a57
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
@@ -0,0 +1,24 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+
+// CHECK-LABEL: @test_permlane16(
+// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true)
+void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1);
+}
+
+// CHECK-LABEL: @test_permlanex16(
+// CHECK: call i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true)
+void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1);
+}
+
+// CHECK-LABEL: @test_mov_dpp8(
+// CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
+void test_mov_dpp8(global uint* out, uint a) {
+  *out = __builtin_amdgcn_mov_dpp8(a, 1);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index e4c40d92266fd..bbae5ea24be0a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -5,6 +5,10 @@
 
 typedef unsigned long ulong;
 typedef unsigned int uint;
+typedef unsigned short ushort;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef ushort __attribute__((ext_vector_type(2))) ushort2;
 
 // CHECK-LABEL: @test_div_scale_f64
 // CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
@@ -590,6 +594,66 @@ kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) {
   *out = __builtin_amdgcn_mbcnt_hi(src0, src1);
 }
 
+// CHECK-LABEL: @test_alignbit(
+// CHECK: tail call i32 @llvm.amdgcn.alignbit(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_alignbit(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_alignbyte(
+// CHECK: tail call i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_alignbyte(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_ubfe(
+// CHECK: tail call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_ubfe(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_ubfe(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_sbfe(
+// CHECK: tail call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2)
+kernel void test_sbfe(global uint* out, uint src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_sbfe(src0, src1, src2);
+}
+
+// CHECK-LABEL: @test_cvt_pkrtz(
+// CHECK: tail call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1)
+kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pkrtz(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_i16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1)
+kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pknorm_u16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1)
+kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) {
+  *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_i16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1)
+kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) {
+  *out = __builtin_amdgcn_cvt_pk_i16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_u16(
+// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1)
+kernel void test_cvt_pk_u16(global ushort2* out, uint src0, uint src1) {
+  *out = __builtin_amdgcn_cvt_pk_u16(src0, src1);
+}
+
+// CHECK-LABEL: @test_cvt_pk_u8_f32
+// CHECK: tail call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2)
+kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src2) {
+  *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2);
+}
+
 // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
 // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
 // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
new file mode 100644
index 0000000000000..75d9cd3831c55
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
+
+typedef unsigned int uint;
+
+
+void test_permlane16(global uint* out, uint a, uint b, uint c, uint d, uint e) {
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlane16' must be a constant integer}}
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlane16' must be a constant integer}}
+}
+
+void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d, uint e) {
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlanex16' must be a constant integer}}
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlanex16' must be a constant integer}}
+}
+
+void test_mov_dpp8(global uint* out, uint a, uint b) {
+  *out = __builtin_amdgcn_mov_dpp8(a, b); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}}
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
new file mode 100644
index 0000000000000..02c8dc8c1339e
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl
@@ -0,0 +1,15 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu fiji -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s
+
+typedef unsigned int uint;
+
+
+void test(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16' needs target feature gfx10-insts}}
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1);  // expected-error {{'__builtin_amdgcn_permlanex16' needs target feature gfx10-insts}}
+  *out = __builtin_amdgcn_mov_dpp8(a, 1);  // expected-error {{'__builtin_amdgcn_mov_dpp8' needs target feature gfx10-insts}}
+}

From 420f3f6444ac393a3c65382d4c9a7b4f2bb23ed4 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 17 Jul 2019 01:19:30 +0000
Subject: [PATCH 311/451] gn build: Merge r366216

llvm-svn: 366288
---
 llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn
index 59d15041a5268..19510c1629d36 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn
@@ -6,6 +6,7 @@ static_library("Remarks") {
 
   sources = [
     "Remark.cpp",
+    "RemarkFormat.cpp",
     "RemarkParser.cpp",
     "RemarkStringTable.cpp",
     "YAMLRemarkParser.cpp",

From 67cf3d61ee3c12595afa264c6c079382cb33f8de Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 17 Jul 2019 01:21:14 +0000
Subject: [PATCH 312/451] gn build: Merge r366265

llvm-svn: 366289
---
 .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
index 2bd2a69b4e6ae..b82db708cc894 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
@@ -16,6 +16,7 @@ static_library("readability") {
     "BracesAroundStatementsCheck.cpp",
     "ConstReturnTypeCheck.cpp",
     "ContainerSizeEmptyCheck.cpp",
+    "ConvertMemberFunctionsToStatic.cpp",
     "DeleteNullPointerCheck.cpp",
     "DeletedDefaultCheck.cpp",
     "ElseAfterReturnCheck.cpp",

From 4c77a696ae4d42d791b7443ce387d9f42197e10d Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucf@cn.ibm.com>
Date: Wed, 17 Jul 2019 07:02:02 +0000
Subject: [PATCH 313/451] Update email address.

llvm-svn: 366291
---
 llvm/CREDITS.TXT | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/CREDITS.TXT b/llvm/CREDITS.TXT
index dab633c7e3b11..e998fa6bb257d 100644
--- a/llvm/CREDITS.TXT
+++ b/llvm/CREDITS.TXT
@@ -412,6 +412,10 @@ W: http://vladimir_prus.blogspot.com
 E: ghost@cs.msu.su
 D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass
 
+N: QIU Chaofan
+E: qiucofan@cn.ibm.com
+D: PowerPC Backend Developer
+
 N: Kalle Raiskila
 E: kalle.rasikila@nokia.com
 D: Some bugfixes to CellSPU
@@ -530,10 +534,6 @@ N: Zheng Chen
 E: czhengsz@cn.ibm.com
 D: PowerPC Backend Developer
 
-N: Qiu Chaofan
-E: qiucf@cn.ibm.com
-D: PowerPC Backend Developer
-
 N: Djordje Todorovic
 E: djordje.todorovic@rt-rk.com
 D: Debug Information

From e574f8b3d8910205ff89cf3562088ef50f384638 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Wed, 17 Jul 2019 07:03:17 +0000
Subject: [PATCH 314/451] [Target][NFCI] Rename variable

This variable doesn't have anything to do with clang.

llvm-svn: 366292
---
 lldb/source/Target/ABI.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lldb/source/Target/ABI.cpp b/lldb/source/Target/ABI.cpp
index d71cee7f58054..28cd9aec665cb 100644
--- a/lldb/source/Target/ABI.cpp
+++ b/lldb/source/Target/ABI.cpp
@@ -124,11 +124,11 @@ ValueObjectSP ABI::GetReturnValueObject(Thread &thread, CompilerType &ast_type,
 
     return_valobj_sp = const_valobj_sp;
 
-    ExpressionVariableSP clang_expr_variable_sp(
+    ExpressionVariableSP expr_variable_sp(
         persistent_expression_state->CreatePersistentVariable(
             return_valobj_sp));
 
-    assert(clang_expr_variable_sp);
+    assert(expr_variable_sp);
 
     // Set flags and live data as appropriate
 
@@ -141,21 +141,21 @@ ValueObjectSP ABI::GetReturnValueObject(Thread &thread, CompilerType &ast_type,
       break;
     case Value::eValueTypeScalar:
     case Value::eValueTypeVector:
-      clang_expr_variable_sp->m_flags |=
+      expr_variable_sp->m_flags |=
           ExpressionVariable::EVIsFreezeDried;
-      clang_expr_variable_sp->m_flags |=
+      expr_variable_sp->m_flags |=
           ExpressionVariable::EVIsLLDBAllocated;
-      clang_expr_variable_sp->m_flags |=
+      expr_variable_sp->m_flags |=
           ExpressionVariable::EVNeedsAllocation;
       break;
     case Value::eValueTypeLoadAddress:
-      clang_expr_variable_sp->m_live_sp = live_valobj_sp;
-      clang_expr_variable_sp->m_flags |=
+      expr_variable_sp->m_live_sp = live_valobj_sp;
+      expr_variable_sp->m_flags |=
           ExpressionVariable::EVIsProgramReference;
       break;
     }
 
-    return_valobj_sp = clang_expr_variable_sp->GetValueObject();
+    return_valobj_sp = expr_variable_sp->GetValueObject();
   }
   return return_valobj_sp;
 }

From 06bf5d853881420757146602521b288afd943f3c Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Wed, 17 Jul 2019 07:05:49 +0000
Subject: [PATCH 315/451] [NativePDB] Make GetTranslationUnitDecl return an
 lldb CompilerDeclCtx

Summary:
We intend to make PdbAstBuilder abstract and implement
PdbAstBuilderClang along with any other languages that wish to use
PDBs. This is the first step.

Differential Revision: https://reviews.llvm.org/D64852

llvm-svn: 366293
---
 .../SymbolFile/NativePDB/PdbAstBuilder.cpp    | 19 ++++++++++---------
 .../SymbolFile/NativePDB/PdbAstBuilder.h      |  2 +-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index e8fd59c7b7453..eb8da2a51b6cb 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -214,8 +214,8 @@ PdbAstBuilder::PdbAstBuilder(ObjectFile &obj, PdbIndex &index)
   BuildParentMap();
 }
 
-clang::DeclContext &PdbAstBuilder::GetTranslationUnitDecl() {
-  return *m_clang.GetTranslationUnitDecl();
+lldb_private::CompilerDeclContext PdbAstBuilder::GetTranslationUnitDecl() {
+  return ToCompilerDeclContext(*m_clang.GetTranslationUnitDecl());
 }
 
 std::pair<clang::DeclContext *, std::string>
@@ -492,7 +492,7 @@ clang::Decl *PdbAstBuilder::GetOrCreateDeclForUid(PdbSymUid uid) {
 clang::DeclContext *PdbAstBuilder::GetOrCreateDeclContextForUid(PdbSymUid uid) {
   if (uid.kind() == PdbSymUidKind::CompilandSym) {
     if (uid.asCompilandSym().offset == 0)
-      return &GetTranslationUnitDecl();
+      return FromCompilerDeclContext(GetTranslationUnitDecl());
   }
 
   clang::Decl *decl = GetOrCreateDeclForUid(uid);
@@ -507,7 +507,7 @@ PdbAstBuilder::CreateDeclInfoForUndecoratedName(llvm::StringRef name) {
   MSVCUndecoratedNameParser parser(name);
   llvm::ArrayRef<MSVCUndecoratedNameSpecifier> specs = parser.GetSpecifiers();
 
-  clang::DeclContext *context = &GetTranslationUnitDecl();
+  auto context = FromCompilerDeclContext(GetTranslationUnitDecl());
 
   llvm::StringRef uname = specs.back().GetBaseName();
   specs = specs.drop_back();
@@ -548,7 +548,7 @@ PdbAstBuilder::GetParentDeclContextForSymbol(const CVSymbol &sym) {
   StringView name{pub->Name.begin(), pub->Name.size()};
   llvm::ms_demangle::SymbolNode *node = demangler.parse(name);
   if (!node)
-    return &GetTranslationUnitDecl();
+    return FromCompilerDeclContext(GetTranslationUnitDecl());
   llvm::ArrayRef<llvm::ms_demangle::Node *> name_components{
       node->Name->Components->Nodes, node->Name->Components->Count - 1};
 
@@ -569,7 +569,7 @@ PdbAstBuilder::GetParentDeclContextForSymbol(const CVSymbol &sym) {
   }
 
   // It's not a type.  It must be a series of namespaces.
-  clang::DeclContext *context = &GetTranslationUnitDecl();
+  auto context = FromCompilerDeclContext(GetTranslationUnitDecl());
   while (!name_components.empty()) {
     std::string ns = name_components.front()->toString();
     context = GetOrCreateNamespaceDecl(ns.c_str(), *context);
@@ -597,7 +597,7 @@ clang::DeclContext *PdbAstBuilder::GetParentDeclContext(PdbSymUid uid) {
     PdbTypeSymId type_id = uid.asTypeSym();
     auto iter = m_parent_types.find(type_id.index);
     if (iter == m_parent_types.end())
-      return &GetTranslationUnitDecl();
+      return FromCompilerDeclContext(GetTranslationUnitDecl());
     return GetOrCreateDeclContextForUid(PdbTypeSymId(iter->second));
   }
   case PdbSymUidKind::FieldListMember:
@@ -635,7 +635,7 @@ clang::DeclContext *PdbAstBuilder::GetParentDeclContext(PdbSymUid uid) {
   default:
     break;
   }
-  return &GetTranslationUnitDecl();
+  return FromCompilerDeclContext(GetTranslationUnitDecl());
 }
 
 bool PdbAstBuilder::CompleteType(clang::QualType qt) {
@@ -866,7 +866,8 @@ clang::VarDecl *PdbAstBuilder::GetOrCreateVariableDecl(PdbGlobalSymId var_id) {
     return llvm::dyn_cast<clang::VarDecl>(decl);
 
   CVSymbol sym = m_index.ReadSymbolRecord(var_id);
-  return CreateVariableDecl(PdbSymUid(var_id), sym, GetTranslationUnitDecl());
+  auto context = FromCompilerDeclContext(GetTranslationUnitDecl());
+  return CreateVariableDecl(PdbSymUid(var_id), sym, *context);
 }
 
 clang::TypedefNameDecl *
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h
index e4241594845a9..8331abeaf47da 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h
@@ -53,7 +53,7 @@ class PdbAstBuilder {
   // Constructors and Destructors
   PdbAstBuilder(ObjectFile &obj, PdbIndex &index);
 
-  clang::DeclContext &GetTranslationUnitDecl();
+  lldb_private::CompilerDeclContext GetTranslationUnitDecl();
 
   clang::Decl *GetOrCreateDeclForUid(PdbSymUid uid);
   clang::DeclContext *GetOrCreateDeclContextForUid(PdbSymUid uid);

From 587d13d4c23500f517a99b8c0517e5e2b5aa0889 Mon Sep 17 00:00:00 2001
From: Serguei Katkov <serguei.katkov@azul.com>
Date: Wed, 17 Jul 2019 07:09:20 +0000
Subject: [PATCH 316/451] [LoopInfo] Fix getUniqueNonLatchExitBlocks

It is possible that exit block has two predecessors and one of them is a latch
block while another is not.

Current algorithm is based on the assumption that all exits are dedicated
and therefore we can check only first predecessor of loop exit to find all unique
exits.

However if we do not consider latch block and it is first predecessor of some
exit then this exit will be found.

Regression test is added.

As a side effect of algorithm re-writing, the restriction that all exits are dedicated
is eliminated.

Reviewers: reames, fhahn, efriedma
Reviewed By: efriedma
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D64787

llvm-svn: 366294
---
 llvm/include/llvm/Analysis/LoopInfo.h     |  4 --
 llvm/include/llvm/Analysis/LoopInfoImpl.h | 47 ++++-------------------
 llvm/unittests/Analysis/LoopInfoTest.cpp  | 43 +++++++++++++++++++++
 3 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index f8f0a0c339d58..584eb3a8c854c 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -270,16 +270,12 @@ template <class BlockT, class LoopT> class LoopBase {
 
   /// Return all unique successor blocks of this loop.
   /// These are the blocks _outside of the current loop_ which are branched to.
-  /// This assumes that loop exits are in canonical form, i.e. all exits are
-  /// dedicated exits.
   void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
 
   /// Return all unique successor blocks of this loop except successors from
   /// Latch block are not considered. If the exit comes from Latch has also
   /// non Latch predecessor in a loop it will be added to ExitBlocks.
   /// These are the blocks _outside of the current loop_ which are branched to.
-  /// This assumes that loop exits are in canonical form, i.e. all exits are
-  /// dedicated exits.
   void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
 
   /// If getUniqueExitBlocks would return exactly one block, return that block.
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index c9f690dac65eb..4c33dac9e21e1 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -101,47 +101,14 @@ template <class BlockT, class LoopT, typename PredicateT>
 void getUniqueExitBlocksHelper(const LoopT *L,
                                SmallVectorImpl<BlockT *> &ExitBlocks,
                                PredicateT Pred) {
-  typedef GraphTraits<BlockT *> BlockTraits;
-  typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits;
-
-  assert(L->hasDedicatedExits() &&
-         "getUniqueExitBlocks assumes the loop has canonical form exits!");
-
-  SmallVector<BlockT *, 32> SwitchExitBlocks;
+  assert(!L->isInvalid() && "Loop not in a valid state!");
+  SmallPtrSet<BlockT *, 32> Visited;
   auto Filtered = make_filter_range(L->blocks(), Pred);
-  for (BlockT *Block : Filtered) {
-    SwitchExitBlocks.clear();
-    for (BlockT *Successor : children<BlockT *>(Block)) {
-      // If block is inside the loop then it is not an exit block.
-      if (L->contains(Successor))
-        continue;
-
-      BlockT *FirstPred = *InvBlockTraits::child_begin(Successor);
-
-      // If current basic block is this exit block's first predecessor then only
-      // insert exit block in to the output ExitBlocks vector. This ensures that
-      // same exit block is not inserted twice into ExitBlocks vector.
-      if (Block != FirstPred)
-        continue;
-
-      // If a terminator has more then two successors, for example SwitchInst,
-      // then it is possible that there are multiple edges from current block to
-      // one exit block.
-      if (std::distance(BlockTraits::child_begin(Block),
-                        BlockTraits::child_end(Block)) <= 2) {
-        ExitBlocks.push_back(Successor);
-        continue;
-      }
-
-      // In case of multiple edges from current block to exit block, collect
-      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
-      // duplicate edges.
-      if (!is_contained(SwitchExitBlocks, Successor)) {
-        SwitchExitBlocks.push_back(Successor);
-        ExitBlocks.push_back(Successor);
-      }
-    }
-  }
+  for (BlockT *BB : Filtered)
+    for (BlockT *Successor : children<BlockT *>(BB))
+      if (!L->contains(Successor))
+        if (Visited.insert(Successor).second)
+          ExitBlocks.push_back(Successor);
 }
 
 template <class BlockT, class LoopT>
diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp
index 953a72aee8e36..4f0047f2fd7fc 100644
--- a/llvm/unittests/Analysis/LoopInfoTest.cpp
+++ b/llvm/unittests/Analysis/LoopInfoTest.cpp
@@ -1156,3 +1156,46 @@ TEST(LoopInfoTest, LoopUniqueExitBlocks) {
     EXPECT_TRUE(Exits.size() == 1);
   });
 }
+
+// Regression test for  getUniqueNonLatchExitBlocks functions.
+// It should detect the exit if it comes from both latch and non-latch blocks.
+TEST(LoopInfoTest, LoopNonLatchUniqueExitBlocks) {
+  const char *ModuleStr =
+      "target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
+      "define void @foo(i32 %n, i1 %cond) {\n"
+      "entry:\n"
+      "  br label %for.cond\n"
+      "for.cond:\n"
+      "  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]\n"
+      "  %cmp = icmp slt i32 %i.0, %n\n"
+      "  br i1 %cond, label %for.inc, label %for.end\n"
+      "for.inc:\n"
+      "  %inc = add nsw i32 %i.0, 1\n"
+      "  br i1 %cmp, label %for.cond, label %for.end, !llvm.loop !0\n"
+      "for.end:\n"
+      "  ret void\n"
+      "}\n"
+      "!0 = distinct !{!0, !1}\n"
+      "!1 = !{!\"llvm.loop.distribute.enable\", i1 true}\n";
+
+  // Parse the module.
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleStr);
+
+  runWithLoopInfo(*M, "foo", [&](Function &F, LoopInfo &LI) {
+    Function::iterator FI = F.begin();
+    // First basic block is entry - skip it.
+    BasicBlock *Header = &*(++FI);
+    assert(Header->getName() == "for.cond");
+    Loop *L = LI.getLoopFor(Header);
+
+    SmallVector<BasicBlock *, 2> Exits;
+    // This loop has 1 unique exit.
+    L->getUniqueExitBlocks(Exits);
+    EXPECT_TRUE(Exits.size() == 1);
+    // And one unique non latch exit.
+    Exits.clear();
+    L->getUniqueNonLatchExitBlocks(Exits);
+    EXPECT_TRUE(Exits.size() == 1);
+  });
+}

From fc1c8f5d7d47139492ed9d6ef0df3c47eb1fbfd4 Mon Sep 17 00:00:00 2001
From: Alex Langford <apl@fb.com>
Date: Wed, 17 Jul 2019 07:13:42 +0000
Subject: [PATCH 317/451] [Target][NFCI] Remove commented out code

llvm-svn: 366295
---
 lldb/source/Target/ThreadPlanTracer.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lldb/source/Target/ThreadPlanTracer.cpp b/lldb/source/Target/ThreadPlanTracer.cpp
index 129c0896d491c..4e79b6b1e59d0 100644
--- a/lldb/source/Target/ThreadPlanTracer.cpp
+++ b/lldb/source/Target/ThreadPlanTracer.cpp
@@ -187,8 +187,6 @@ void ThreadPlanAssemblyTracer::Log() {
     for (int arg_index = 0; arg_index < num_args; ++arg_index) {
       Value value;
       value.SetValueType(Value::eValueTypeScalar);
-      //            value.SetContext (Value::eContextTypeClangType,
-      //            intptr_type.GetOpaqueQualType());
       value.SetCompilerType(intptr_type);
       value_list.PushValue(value);
     }

From 7f308af5eeea2d1b24aee0361d39dc43bac4cfe5 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 17 Jul 2019 08:11:15 +0000
Subject: [PATCH 318/451] [mips] Support the "o" inline asm constraint

As well as other LLVM targets we do not handle "offsettable"
memory addresses in any special way. In other words, the "o" constraint
is an exact equivalent of the "m" one. But some existing code require
the "o" constraint support.

This fixes PR42589.

Differential Revision: https://reviews.llvm.org/D64792

llvm-svn: 366299
---
 llvm/lib/Target/Mips/MipsISelLowering.h       |  2 +
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp   |  1 +
 .../CodeGen/Mips/inlineasm_constraint_o.ll    | 61 +++++++++++++++++++
 3 files changed, 64 insertions(+)
 create mode 100644 llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll

diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 27bf18c8ba90d..2db60e9801f1b 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -653,6 +653,8 @@ class TargetRegisterClass;
 
     unsigned
     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+      if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
       if (ConstraintCode == "R")
         return InlineAsm::Constraint_R;
       if (ConstraintCode == "ZC")
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index c50e4c215a4df..703f99f37dd1b 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -1237,6 +1237,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
     return false;
   case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_o:
     if (selectAddrRegImm16(Op, Base, Offset)) {
       OutOps.push_back(Base);
       OutOps.push_back(Offset);
diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll b/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll
new file mode 100644
index 0000000000000..de677cbcc6813
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s
+
+@data = global [8193 x i32] zeroinitializer
+
+define void @o(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: o:
+
+  call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0))
+
+  ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: #APP
+  ; CHECK: lw $1, 0($[[BASEPTR]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
+
+define void @o_offset_4(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: o_offset_4:
+
+  call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1))
+
+  ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: #APP
+  ; CHECK: lw $1, 4($[[BASEPTR]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
+
+define void @o_offset_32764(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: o_offset_32764:
+
+  call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191))
+
+  ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK: #APP
+  ; CHECK: lw $1, 32764($[[BASEPTR]])
+  ; CHECK: #NO_APP
+
+  ret void
+}
+
+define void @o_offset_32768(i32 *%p) nounwind {
+entry:
+  ; CHECK-LABEL: o_offset_32768:
+
+  call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192))
+
+  ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+  ; CHECK-DAG: ori $[[T0:[0-9]+]], $zero, 32768
+  ; CHECK: addu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], $[[T0]]
+  ; CHECK: #APP
+  ; CHECK: lw $1, 0($[[BASEPTR2]])
+  ; CHECK: #NO_APP
+
+  ret void
+}

From a884afb6f8ea18f89de767f56f54b43c93fa8b85 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 17 Jul 2019 08:11:31 +0000
Subject: [PATCH 319/451] [mips] Implement .cplocal directive

This directive forces to use the alternate register for context pointer.
For example, this code:
  .cplocal $4
  jal foo
expands to:
  ld    $25, %call16(foo)($4)
  jalr  $25

Differential Revision: https://reviews.llvm.org/D64743

llvm-svn: 366300
---
 .../Target/Mips/AsmParser/MipsAsmParser.cpp   | 67 +++++++++++++++----
 .../Mips/MCTargetDesc/MipsTargetStreamer.cpp  | 67 +++++++++++++------
 llvm/lib/Target/Mips/MipsTargetStreamer.h     |  4 ++
 llvm/test/MC/Mips/cplocal-bad.s               | 20 ++++++
 llvm/test/MC/Mips/cplocal.s                   | 45 +++++++++++++
 5 files changed, 170 insertions(+), 33 deletions(-)
 create mode 100644 llvm/test/MC/Mips/cplocal-bad.s
 create mode 100644 llvm/test/MC/Mips/cplocal.s

diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 0d968674faa85..1f7d095bf49b5 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -146,6 +146,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool IsPicEnabled;
   bool IsCpRestoreSet;
   int CpRestoreOffset;
+  unsigned GPReg;
   unsigned CpSaveLocation;
   /// If true, then CpSaveLocation is a register, otherwise it's an offset.
   bool     CpSaveLocationIsRegister;
@@ -334,6 +335,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseSetFeature(uint64_t Feature);
   bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
   bool parseDirectiveCpLoad(SMLoc Loc);
+  bool parseDirectiveCpLocal(SMLoc Loc);
   bool parseDirectiveCpRestore(SMLoc Loc);
   bool parseDirectiveCPSetup();
   bool parseDirectiveCPReturn();
@@ -527,6 +529,7 @@ class MipsAsmParser : public MCTargetAsmParser {
 
     IsCpRestoreSet = false;
     CpRestoreOffset = -1;
+    GPReg = ABI.GetGlobalPtr();
 
     const Triple &TheTriple = sti.getTargetTriple();
     IsLittleEndian = TheTriple.isLittleEndian();
@@ -2040,7 +2043,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         const MCExpr *Lo16RelocExpr =
             MipsMCExpr::create(MipsMCExpr::MEK_LO, JalExpr, getContext());
 
-        TOut.emitRRX(Mips::LW, Mips::T9, Mips::GP,
+        TOut.emitRRX(Mips::LW, Mips::T9, GPReg,
                      MCOperand::createExpr(Got16RelocExpr), IDLoc, STI);
         TOut.emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
                      MCOperand::createExpr(Lo16RelocExpr), IDLoc, STI);
@@ -2054,7 +2057,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
             MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, JalExpr, getContext());
 
         TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9,
-                     Mips::GP, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
+                     GPReg, MCOperand::createExpr(GotDispRelocExpr), IDLoc,
                      STI);
       }
     } else {
@@ -2065,7 +2068,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
       const MCExpr *Call16RelocExpr =
           MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, JalExpr, getContext());
 
-      TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+      TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, GPReg,
                    MCOperand::createExpr(Call16RelocExpr), IDLoc, STI);
     }
 
@@ -2893,8 +2896,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
                ELF::STB_LOCAL))) {
       const MCExpr *CallExpr =
           MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
-      TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(),
-                   MCOperand::createExpr(CallExpr), IDLoc, STI);
+      TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr),
+                   IDLoc, STI);
       return false;
     }
 
@@ -2933,8 +2936,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
       TmpReg = ATReg;
     }
 
-    TOut.emitRRX(Mips::LW, TmpReg, ABI.GetGlobalPtr(),
-                 MCOperand::createExpr(GotExpr), IDLoc, STI);
+    TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
+                 STI);
 
     if (LoExpr)
       TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
@@ -2969,8 +2972,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
                ELF::STB_LOCAL))) {
       const MCExpr *CallExpr =
           MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext());
-      TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(),
-                   MCOperand::createExpr(CallExpr), IDLoc, STI);
+      TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr),
+                   IDLoc, STI);
       return false;
     }
 
@@ -3012,8 +3015,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
       TmpReg = ATReg;
     }
 
-    TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(),
-                 MCOperand::createExpr(GotExpr), IDLoc, STI);
+    TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc,
+                 STI);
 
     if (LoExpr)
       TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr),
@@ -3243,10 +3246,10 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc,
         MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext());
 
     if(isABI_O32() || isABI_N32()) {
-      TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+      TOut.emitRRX(Mips::LW, ATReg, GPReg, MCOperand::createExpr(GotExpr),
                    IDLoc, STI);
     } else { //isABI_N64()
-      TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+      TOut.emitRRX(Mips::LD, ATReg, GPReg, MCOperand::createExpr(GotExpr),
                    IDLoc, STI);
     }
   } else { //!IsPicEnabled
@@ -7241,6 +7244,40 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
   return false;
 }
 
+bool MipsAsmParser::parseDirectiveCpLocal(SMLoc Loc) {
+  if (!isABI_N32() && !isABI_N64()) {
+    reportParseError(".cplocal is allowed only in N32 or N64 mode");
+    return false;
+  }
+
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
+  OperandMatchResultTy ResTy = parseAnyRegister(Reg);
+  if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+    reportParseError("expected register containing global pointer");
+    return false;
+  }
+
+  MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]);
+  if (!RegOpnd.isGPRAsmReg()) {
+    reportParseError(RegOpnd.getStartLoc(), "invalid register");
+    return false;
+  }
+
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+  getParser().Lex(); // Consume the EndOfStatement.
+
+  unsigned NewReg = RegOpnd.getGPR32Reg();
+  if (IsPicEnabled)
+    GPReg = NewReg;
+
+  getTargetStreamer().emitDirectiveCpLocal(NewReg);
+  return false;
+}
+
 bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) {
   MCAsmParser &Parser = getParser();
 
@@ -8091,6 +8128,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
     parseDirectiveCpRestore(DirectiveID.getLoc());
     return false;
   }
+  if (IDVal == ".cplocal") {
+    parseDirectiveCpLocal(DirectiveID.getLoc());
+    return false;
+  }
   if (IDVal == ".ent") {
     StringRef SymbolName;
 
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index bd978e9e6ae58..e3bdb3b140a8f 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -35,7 +35,7 @@ static cl::opt<bool> RoundSectionSizes(
 } // end anonymous namespace
 
 MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
-    : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
+    : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) {
   GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
 }
 void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
@@ -106,6 +106,23 @@ void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+  // .cplocal $reg
+  // This directive forces to use the alternate register for context pointer.
+  // For example
+  //   .cplocal $4
+  //   jal foo
+  // expands to
+  //   ld    $25, %call16(foo)($4)
+  //   jalr  $25
+
+  if (!getABI().IsN32() && !getABI().IsN64())
+    return;
+
+  GPReg = RegNo;
+
+  forbidModuleDirective();
+}
 bool MipsTargetStreamer::emitDirectiveCpRestore(
     int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
     const MCSubtargetInfo *STI) {
@@ -257,8 +274,7 @@ void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {
 /// Emit the $gp restore operation for .cprestore.
 void MipsTargetStreamer::emitGPRestore(int Offset, SMLoc IDLoc,
                                        const MCSubtargetInfo *STI) {
-  emitLoadWithImmOffset(Mips::LW, Mips::GP, Mips::SP, Offset, Mips::GP, IDLoc,
-                        STI);
+  emitLoadWithImmOffset(Mips::LW, GPReg, Mips::SP, Offset, GPReg, IDLoc, STI);
 }
 
 /// Emit a store instruction with an immediate offset.
@@ -665,6 +681,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   forbidModuleDirective();
 }
 
+void MipsTargetAsmStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+  OS << "\t.cplocal\t$"
+     << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+  MipsTargetStreamer::emitDirectiveCpLocal(RegNo);
+}
+
 bool MipsTargetAsmStreamer::emitDirectiveCpRestore(
     int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
     const MCSubtargetInfo *STI) {
@@ -1135,7 +1157,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
 
   MCInst TmpInst;
   TmpInst.setOpcode(Mips::LUi);
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
   const MCExpr *HiSym = MipsMCExpr::create(
       MipsMCExpr::MEK_HI,
       MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None,
@@ -1147,8 +1169,8 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   TmpInst.clear();
 
   TmpInst.setOpcode(Mips::ADDiu);
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
   const MCExpr *LoSym = MipsMCExpr::create(
       MipsMCExpr::MEK_LO,
       MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None,
@@ -1160,14 +1182,19 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   TmpInst.clear();
 
   TmpInst.setOpcode(Mips::ADDu);
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
-  TmpInst.addOperand(MCOperand::createReg(Mips::GP));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
+  TmpInst.addOperand(MCOperand::createReg(GPReg));
   TmpInst.addOperand(MCOperand::createReg(RegNo));
   getStreamer().EmitInstruction(TmpInst, STI);
 
   forbidModuleDirective();
 }
 
+void MipsTargetELFStreamer::emitDirectiveCpLocal(unsigned RegNo) {
+  if (Pic)
+    MipsTargetStreamer::emitDirectiveCpLocal(RegNo);
+}
+
 bool MipsTargetELFStreamer::emitDirectiveCpRestore(
     int Offset, function_ref<unsigned()> GetATReg, SMLoc IDLoc,
     const MCSubtargetInfo *STI) {
@@ -1184,7 +1211,7 @@ bool MipsTargetELFStreamer::emitDirectiveCpRestore(
     return true;
 
   // Store the $gp on the stack.
-  emitStoreWithImmOffset(Mips::SW, Mips::GP, Mips::SP, Offset, GetATReg, IDLoc,
+  emitStoreWithImmOffset(Mips::SW, GPReg, Mips::SP, Offset, GetATReg, IDLoc,
                          STI);
   return true;
 }
@@ -1205,10 +1232,10 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
   // Either store the old $gp in a register or on the stack
   if (IsReg) {
     // move $save, $gpreg
-    emitRRR(Mips::OR64, RegOrOffset, Mips::GP, Mips::ZERO, SMLoc(), &STI);
+    emitRRR(Mips::OR64, RegOrOffset, GPReg, Mips::ZERO, SMLoc(), &STI);
   } else {
     // sd $gpreg, offset($sp)
-    emitRRI(Mips::SD, Mips::GP, Mips::SP, RegOrOffset, SMLoc(), &STI);
+    emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI);
   }
 
   if (getABI().IsN32()) {
@@ -1221,11 +1248,11 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
         MCA.getContext());
 
     // lui $gp, %hi(__gnu_local_gp)
-    emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
+    emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
 
     // addiu  $gp, $gp, %lo(__gnu_local_gp)
-    emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr),
-            SMLoc(), &STI);
+    emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
+            &STI);
 
     return;
   }
@@ -1238,14 +1265,14 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
       MCA.getContext());
 
   // lui $gp, %hi(%neg(%gp_rel(funcSym)))
-  emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
+  emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI);
 
   // addiu  $gp, $gp, %lo(%neg(%gp_rel(funcSym)))
-  emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr),
-          SMLoc(), &STI);
+  emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(),
+          &STI);
 
   // daddu  $gp, $gp, $funcreg
-  emitRRR(Mips::DADDu, Mips::GP, Mips::GP, RegNo, SMLoc(), &STI);
+  emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI);
 }
 
 void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
@@ -1258,12 +1285,12 @@ void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
   // Either restore the old $gp from a register or on the stack
   if (SaveLocationIsRegister) {
     Inst.setOpcode(Mips::OR);
-    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(GPReg));
     Inst.addOperand(MCOperand::createReg(SaveLocation));
     Inst.addOperand(MCOperand::createReg(Mips::ZERO));
   } else {
     Inst.setOpcode(Mips::LD);
-    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(GPReg));
     Inst.addOperand(MCOperand::createReg(Mips::SP));
     Inst.addOperand(MCOperand::createImm(SaveLocation));
   }
diff --git a/llvm/lib/Target/Mips/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 697a5c4193ea2..1fa8ebadd6435 100644
--- a/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -91,6 +91,7 @@ class MipsTargetStreamer : public MCTargetStreamer {
 
   // PIC support
   virtual void emitDirectiveCpLoad(unsigned RegNo);
+  virtual void emitDirectiveCpLocal(unsigned RegNo);
   virtual bool emitDirectiveCpRestore(int Offset,
                                       function_ref<unsigned()> GetATReg,
                                       SMLoc IDLoc, const MCSubtargetInfo *STI);
@@ -199,6 +200,7 @@ class MipsTargetStreamer : public MCTargetStreamer {
   bool FrameInfoSet;
   int FrameOffset;
   unsigned FrameReg;
+  unsigned GPReg;
   unsigned ReturnReg;
 
 private:
@@ -274,6 +276,7 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer {
 
   // PIC support
   void emitDirectiveCpLoad(unsigned RegNo) override;
+  void emitDirectiveCpLocal(unsigned RegNo) override;
 
   /// Emit a .cprestore directive.  If the offset is out of range then it will
   /// be synthesized using the assembler temporary.
@@ -345,6 +348,7 @@ class MipsTargetELFStreamer : public MipsTargetStreamer {
 
   // PIC support
   void emitDirectiveCpLoad(unsigned RegNo) override;
+  void emitDirectiveCpLocal(unsigned RegNo) override;
   bool emitDirectiveCpRestore(int Offset, function_ref<unsigned()> GetATReg,
                               SMLoc IDLoc, const MCSubtargetInfo *STI) override;
   void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
diff --git a/llvm/test/MC/Mips/cplocal-bad.s b/llvm/test/MC/Mips/cplocal-bad.s
new file mode 100644
index 0000000000000..0236fdb34db12
--- /dev/null
+++ b/llvm/test/MC/Mips/cplocal-bad.s
@@ -0,0 +1,20 @@
+# RUN: not llvm-mc -triple=mips-unknown-linux-gnu %s 2>&1 \
+# RUN:   | FileCheck -check-prefix=O32 %s
+# RUN: not llvm-mc -triple=mips64-unknown-linux-gnuabin32 %s 2>&1 \
+# RUN:   | FileCheck -check-prefix=NABI %s
+# RUN: not llvm-mc -triple=mips64-unknown-linux-gnu %s 2>&1 \
+# RUN:   | FileCheck -check-prefix=NABI %s
+
+  .text
+  .cplocal $32
+# O32:  :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode
+# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: invalid register
+  .cplocal $foo
+# O32:  :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode
+# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: expected register containing global pointer
+  .cplocal bar
+# O32:  :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode
+# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: expected register containing global pointer
+  .cplocal $25 foobar
+# O32:  :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode
+# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: unexpected token, expected end of statement
diff --git a/llvm/test/MC/Mips/cplocal.s b/llvm/test/MC/Mips/cplocal.s
new file mode 100644
index 0000000000000..bfd9ba560b5de
--- /dev/null
+++ b/llvm/test/MC/Mips/cplocal.s
@@ -0,0 +1,45 @@
+# RUN: llvm-mc -triple=mips64-unknown-linux-gnuabin32 -position-independent %s \
+# RUN:   | FileCheck -check-prefix=ASM-PIC32 %s
+# RUN: llvm-mc -triple=mips64-unknown-linux-gnu -position-independent %s \
+# RUN:   | FileCheck -check-prefix=ASM-PIC64 %s
+# RUN: llvm-mc -triple=mips64-unknown-linux-gnuabin32 %s \
+# RUN:   | FileCheck -check-prefix=ASM-NPIC %s
+# RUN: llvm-mc -triple=mips64-unknown-linux-gnu %s \
+# RUN:   | FileCheck -check-prefix=ASM-NPIC %s
+
+# RUN: llvm-mc -triple=mips64-unknown-linux-gnuabin32 \
+# RUN:         -position-independent -filetype=obj -o - %s \
+# RUN:   | llvm-objdump -d -r - | FileCheck -check-prefix=OBJ32 %s
+# RUN: llvm-mc -triple=mips64-unknown-linux-gnu \
+# RUN:         -position-independent -filetype=obj -o - %s \
+# RUN:   | llvm-objdump -d -r - | FileCheck -check-prefix=OBJ64 %s
+
+# ASM-PIC32:  .text
+# ASM-PIC32:  .cplocal $4
+# ASM-PIC32:  lw $25, %call16(foo)($4)
+# ASM-PIC32:  jalr $25
+
+# ASM-PIC64:  .text
+# ASM-PIC64:  .cplocal $4
+# ASM-PIC64:  ld $25, %call16(foo)($4)
+# ASM-PIC64:  jalr $25
+
+# ASM-NPIC:  .text
+# ASM-NPIC:  .cplocal $4
+# ASM-NPIC:  jal foo
+
+# OBJ32:   lw $25, 0($4)
+# OBJ32: R_MIPS_CALL16 foo
+# OBJ32:   jalr $25
+# OBJ32: R_MIPS_JALR foo
+
+# OBJ64:   ld $25, 0($4)
+# OBJ64: R_MIPS_CALL16/R_MIPS_NONE/R_MIPS_NONE foo
+# OBJ64:   jalr $25
+# OBJ64: R_MIPS_JALR/R_MIPS_NONE/R_MIPS_NONE foo
+
+  .text
+  .cplocal $4
+  jal foo
+foo:
+  nop

From 4c1e4408927e0c260beb30114ccaf48b017a5ecb Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 17 Jul 2019 08:11:40 +0000
Subject: [PATCH 320/451] [mips] Use mult/mflo pattern on 64-bit targets prior
 to MIPS64

The `MUL` instruction is available starting from the MIPS32/MIPS64 targets.

llvm-svn: 366301
---
 llvm/lib/Target/Mips/Mips64InstrInfo.td | 2 +-
 llvm/test/CodeGen/Mips/llvm-ir/mul.ll   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td
index b5711004f7007..7f35280f79363 100644
--- a/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -853,7 +853,7 @@ def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))),
               (SUBu GPR32:$src, GPR32:$src2), sub_32)>;
 def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))),
               (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
-              (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6;
+              (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS32_NOT_32R6_64R6;
 def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))),
               (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
               (PseudoMFHI ACC64:$src), sub_32)>;
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/llvm-ir/mul.ll
index b6f535abdee35..c75bda3f394ad 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/mul.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/mul.ll
@@ -155,6 +155,9 @@ entry:
   ; M2:         mult    $4, $5
   ; M2:         mflo    $2
 
+  ; M4:         mult    $4, $5
+  ; M4:         mflo    $1
+
   ; 32R1-R5:    mul     $2, $4, $5
   ; 32R6:       mul     $2, $4, $5
 

From 6d4b1c046b821179f0cdf126234a12704a2fbed7 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 17 Jul 2019 08:11:57 +0000
Subject: [PATCH 321/451] [mips] Name inline asm constraint test cases in a
 uniform manner. NFC

llvm-svn: 366302
---
 .../Mips/{inlineasm_constraint_R.ll => inlineasm-constraint-R.ll} | 0
 .../{inlineasm_constraint_ZC.ll => inlineasm-constraint-ZC-1.ll}  | 0
 ...{inlineasm-constraint_ZC_2.ll => inlineasm-constraint-ZC-2.ll} | 0
 ...lineasm-cnstrnt-bad-I-1.ll => inlineasm-constraint-bad-I-1.ll} | 0
 .../{inlineasm-cnstrnt-bad-J.ll => inlineasm-constraint-bad-J.ll} | 0
 .../{inlineasm-cnstrnt-bad-K.ll => inlineasm-constraint-bad-K.ll} | 0
 .../{inlineasm-cnstrnt-bad-L.ll => inlineasm-constraint-bad-L.ll} | 0
 .../{inlineasm-cnstrnt-bad-N.ll => inlineasm-constraint-bad-N.ll} | 0
 .../{inlineasm-cnstrnt-bad-O.ll => inlineasm-constraint-bad-O.ll} | 0
 .../{inlineasm-cnstrnt-bad-P.ll => inlineasm-constraint-bad-P.ll} | 0
 ...inlineasm-cnstrnt-bad-l1.ll => inlineasm-constraint-bad-l1.ll} | 0
 .../{inline-asm-i-constraint-i1.ll => inlineasm-constraint-i1.ll} | 0
 .../{inlineasm_constraint_m.ll => inlineasm-constraint-m-1.ll}    | 0
 .../Mips/{inlineasmmemop.ll => inlineasm-constraint-m-2.ll}       | 0
 .../Mips/{inlineasm_constraint_o.ll => inlineasm-constraint-o.ll} | 0
 .../{inlineasm-cnstrnt-reg.ll => inlineasm-constraint-reg.ll}     | 0
 .../{inlineasm-cnstrnt-reg64.ll => inlineasm-constraint-reg64.ll} | 0
 .../Mips/{inlineasm_constraint.ll => inlineasm-constraint.ll}     | 0
 18 files changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/Mips/{inlineasm_constraint_R.ll => inlineasm-constraint-R.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm_constraint_ZC.ll => inlineasm-constraint-ZC-1.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-constraint_ZC_2.ll => inlineasm-constraint-ZC-2.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-I-1.ll => inlineasm-constraint-bad-I-1.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-J.ll => inlineasm-constraint-bad-J.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-K.ll => inlineasm-constraint-bad-K.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-L.ll => inlineasm-constraint-bad-L.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-N.ll => inlineasm-constraint-bad-N.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-O.ll => inlineasm-constraint-bad-O.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-P.ll => inlineasm-constraint-bad-P.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-l1.ll => inlineasm-constraint-bad-l1.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inline-asm-i-constraint-i1.ll => inlineasm-constraint-i1.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm_constraint_m.ll => inlineasm-constraint-m-1.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasmmemop.ll => inlineasm-constraint-m-2.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm_constraint_o.ll => inlineasm-constraint-o.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-reg.ll => inlineasm-constraint-reg.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-reg64.ll => inlineasm-constraint-reg64.ll} (100%)
 rename llvm/test/CodeGen/Mips/{inlineasm_constraint.ll => inlineasm-constraint.ll} (100%)

diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_R.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm_constraint_R.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_ZC.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm_constraint_ZC.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint_ZC_2.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-constraint_ZC_2.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-I-1.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-I-1.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-J.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-J.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-K.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-K.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-L.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-L.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-N.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-N.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-O.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-O.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-P.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-P.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-l1.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-l1.ll
diff --git a/llvm/test/CodeGen/Mips/inline-asm-i-constraint-i1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-i1.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inline-asm-i-constraint-i1.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-i1.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_m.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm_constraint_m.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasmmemop.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasmmemop.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-reg.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-reg.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-reg64.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint-reg64.ll
diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint.ll
similarity index 100%
rename from llvm/test/CodeGen/Mips/inlineasm_constraint.ll
rename to llvm/test/CodeGen/Mips/inlineasm-constraint.ll

From 1292464125fd257114fc03384ac9cdf185ee1831 Mon Sep 17 00:00:00 2001
From: Simon Atanasyan <simon@atanasyan.com>
Date: Wed, 17 Jul 2019 08:12:03 +0000
Subject: [PATCH 322/451] [mips] Remove redundant test case. NFC

The `inlineasm-constraint-reg64.ll` test checks the same functionality.

llvm-svn: 366303
---
 llvm/test/CodeGen/Mips/inlineasm64.ll | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 llvm/test/CodeGen/Mips/inlineasm64.ll

diff --git a/llvm/test/CodeGen/Mips/inlineasm64.ll b/llvm/test/CodeGen/Mips/inlineasm64.ll
deleted file mode 100644
index 82abdf82a3ed3..0000000000000
--- a/llvm/test/CodeGen/Mips/inlineasm64.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s
-
-@gl2 = external global i64
-@gl1 = external global i64
-@gl0 = external global i64
-
-define void @foo1() nounwind {
-entry:
-; CHECK: foo1
-; CHECK: daddu
-  %0 = load i64, i64* @gl1, align 8
-  %1 = load i64, i64* @gl0, align 8
-  %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
-  store i64 %2, i64* @gl2, align 8
-  ret void
-}
-

From 42cfbafc1fe9813720d6456c91547432d33c2570 Mon Sep 17 00:00:00 2001
From: Rui Ueyama <ruiu@google.com>
Date: Wed, 17 Jul 2019 08:31:51 +0000
Subject: [PATCH 323/451] Replace CRLF with LF.

llvm-svn: 366304
---
 .../ClangTidyDiagnosticConsumerTest.cpp       | 188 +++++++++---------
 1 file changed, 94 insertions(+), 94 deletions(-)

diff --git a/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp b/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp
index 5547f98ffd030..67f1dfdf029ec 100644
--- a/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp
@@ -1,94 +1,94 @@
-#include "ClangTidy.h"
-#include "ClangTidyTest.h"
-#include "gtest/gtest.h"
-
-namespace clang {
-namespace tidy {
-namespace test {
-
-class TestCheck : public ClangTidyCheck {
-public:
-  TestCheck(StringRef Name, ClangTidyContext *Context)
-      : ClangTidyCheck(Name, Context) {}
-  void registerMatchers(ast_matchers::MatchFinder *Finder) override {
-    Finder->addMatcher(ast_matchers::varDecl().bind("var"), this);
-  }
-  void check(const ast_matchers::MatchFinder::MatchResult &Result) override {
-    const auto *Var = Result.Nodes.getNodeAs<VarDecl>("var");
-    // Add diagnostics in the wrong order.
-    diag(Var->getLocation(), "variable");
-    diag(Var->getTypeSpecStartLoc(), "type specifier");
-  }
-};
-
-TEST(ClangTidyDiagnosticConsumer, SortsErrors) {
-  std::vector<ClangTidyError> Errors;
-  runCheckOnCode<TestCheck>("int a;", &Errors);
-  EXPECT_EQ(2ul, Errors.size());
-  EXPECT_EQ("type specifier", Errors[0].Message.Message);
-  EXPECT_EQ("variable", Errors[1].Message.Message);
-}
-
-TEST(GlobList, Empty) {
-  GlobList Filter("");
-
-  EXPECT_TRUE(Filter.contains(""));
-  EXPECT_FALSE(Filter.contains("aaa"));
-}
-
-TEST(GlobList, Nothing) {
-  GlobList Filter("-*");
-
-  EXPECT_FALSE(Filter.contains(""));
-  EXPECT_FALSE(Filter.contains("a"));
-  EXPECT_FALSE(Filter.contains("-*"));
-  EXPECT_FALSE(Filter.contains("-"));
-  EXPECT_FALSE(Filter.contains("*"));
-}
-
-TEST(GlobList, Everything) {
-  GlobList Filter("*");
-
-  EXPECT_TRUE(Filter.contains(""));
-  EXPECT_TRUE(Filter.contains("aaaa"));
-  EXPECT_TRUE(Filter.contains("-*"));
-  EXPECT_TRUE(Filter.contains("-"));
-  EXPECT_TRUE(Filter.contains("*"));
-}
-
-TEST(GlobList, Simple) {
-  GlobList Filter("aaa");
-
-  EXPECT_TRUE(Filter.contains("aaa"));
-  EXPECT_FALSE(Filter.contains(""));
-  EXPECT_FALSE(Filter.contains("aa"));
-  EXPECT_FALSE(Filter.contains("aaaa"));
-  EXPECT_FALSE(Filter.contains("bbb"));
-}
-
-TEST(GlobList, WhitespacesAtBegin) {
-  GlobList Filter("-*,   a.b.*");
-
-  EXPECT_TRUE(Filter.contains("a.b.c"));
-  EXPECT_FALSE(Filter.contains("b.c"));
-}
-
-TEST(GlobList, Complex) {
-  GlobList Filter("*,-a.*, -b.*, \r  \n  a.1.* ,-a.1.A.*,-..,-...,-..+,-*$, -*qwe* ");
-
-  EXPECT_TRUE(Filter.contains("aaa"));
-  EXPECT_TRUE(Filter.contains("qqq"));
-  EXPECT_FALSE(Filter.contains("a."));
-  EXPECT_FALSE(Filter.contains("a.b"));
-  EXPECT_FALSE(Filter.contains("b."));
-  EXPECT_FALSE(Filter.contains("b.b"));
-  EXPECT_TRUE(Filter.contains("a.1.b"));
-  EXPECT_FALSE(Filter.contains("a.1.A.a"));
-  EXPECT_FALSE(Filter.contains("qwe"));
-  EXPECT_FALSE(Filter.contains("asdfqweasdf"));
-  EXPECT_TRUE(Filter.contains("asdfqwEasdf"));
-}
-
-} // namespace test
-} // namespace tidy
-} // namespace clang
+#include "ClangTidy.h"
+#include "ClangTidyTest.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace tidy {
+namespace test {
+
+class TestCheck : public ClangTidyCheck {
+public:
+  TestCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override {
+    Finder->addMatcher(ast_matchers::varDecl().bind("var"), this);
+  }
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override {
+    const auto *Var = Result.Nodes.getNodeAs<VarDecl>("var");
+    // Add diagnostics in the wrong order.
+    diag(Var->getLocation(), "variable");
+    diag(Var->getTypeSpecStartLoc(), "type specifier");
+  }
+};
+
+TEST(ClangTidyDiagnosticConsumer, SortsErrors) {
+  std::vector<ClangTidyError> Errors;
+  runCheckOnCode<TestCheck>("int a;", &Errors);
+  EXPECT_EQ(2ul, Errors.size());
+  EXPECT_EQ("type specifier", Errors[0].Message.Message);
+  EXPECT_EQ("variable", Errors[1].Message.Message);
+}
+
+TEST(GlobList, Empty) {
+  GlobList Filter("");
+
+  EXPECT_TRUE(Filter.contains(""));
+  EXPECT_FALSE(Filter.contains("aaa"));
+}
+
+TEST(GlobList, Nothing) {
+  GlobList Filter("-*");
+
+  EXPECT_FALSE(Filter.contains(""));
+  EXPECT_FALSE(Filter.contains("a"));
+  EXPECT_FALSE(Filter.contains("-*"));
+  EXPECT_FALSE(Filter.contains("-"));
+  EXPECT_FALSE(Filter.contains("*"));
+}
+
+TEST(GlobList, Everything) {
+  GlobList Filter("*");
+
+  EXPECT_TRUE(Filter.contains(""));
+  EXPECT_TRUE(Filter.contains("aaaa"));
+  EXPECT_TRUE(Filter.contains("-*"));
+  EXPECT_TRUE(Filter.contains("-"));
+  EXPECT_TRUE(Filter.contains("*"));
+}
+
+TEST(GlobList, Simple) {
+  GlobList Filter("aaa");
+
+  EXPECT_TRUE(Filter.contains("aaa"));
+  EXPECT_FALSE(Filter.contains(""));
+  EXPECT_FALSE(Filter.contains("aa"));
+  EXPECT_FALSE(Filter.contains("aaaa"));
+  EXPECT_FALSE(Filter.contains("bbb"));
+}
+
+TEST(GlobList, WhitespacesAtBegin) {
+  GlobList Filter("-*,   a.b.*");
+
+  EXPECT_TRUE(Filter.contains("a.b.c"));
+  EXPECT_FALSE(Filter.contains("b.c"));
+}
+
+TEST(GlobList, Complex) {
+  GlobList Filter("*,-a.*, -b.*, \r  \n  a.1.* ,-a.1.A.*,-..,-...,-..+,-*$, -*qwe* ");
+
+  EXPECT_TRUE(Filter.contains("aaa"));
+  EXPECT_TRUE(Filter.contains("qqq"));
+  EXPECT_FALSE(Filter.contains("a."));
+  EXPECT_FALSE(Filter.contains("a.b"));
+  EXPECT_FALSE(Filter.contains("b."));
+  EXPECT_FALSE(Filter.contains("b.b"));
+  EXPECT_TRUE(Filter.contains("a.1.b"));
+  EXPECT_FALSE(Filter.contains("a.1.A.a"));
+  EXPECT_FALSE(Filter.contains("qwe"));
+  EXPECT_FALSE(Filter.contains("asdfqweasdf"));
+  EXPECT_TRUE(Filter.contains("asdfqwEasdf"));
+}
+
+} // namespace test
+} // namespace tidy
+} // namespace clang

From 96627215729080f42488185b29e03e73cee08204 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Wed, 17 Jul 2019 08:37:00 +0000
Subject: [PATCH 324/451] [Driver] Enable __cxa_atexit on Solaris

Starting with Solaris 11.4 (which is now the required minimal version), Solaris does
support __cxa_atexit.  This patch reflects that.

One might consider removing the affected tests altogether instead of inverting them,
as is done on other targets.

Besides, this lets two ASan tests PASS:

  AddressSanitizer-i386-sunos :: TestCases/init-order-atexit.cc
  AddressSanitizer-i386-sunos-dynamic :: TestCases/init-order-atexit.cc

Tested on x86_64-pc-solaris2.11 and sparcv9-sun-solaris2.11.

Differential Revision: https://reviews.llvm.org/D64491

llvm-svn: 366305
---
 clang/lib/Driver/ToolChains/Clang.cpp | 1 -
 clang/test/Driver/cxa-atexit.cpp      | 2 +-
 clang/test/Driver/solaris-opts.c      | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index edc64581172ff..cb861f27aedab 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4749,7 +4749,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasFlag(
           options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
           !RawTriple.isOSWindows() &&
-              RawTriple.getOS() != llvm::Triple::Solaris &&
               TC.getArch() != llvm::Triple::xcore &&
               ((RawTriple.getVendor() != llvm::Triple::MipsTechnologies) ||
                RawTriple.hasEnvironment())) ||
diff --git a/clang/test/Driver/cxa-atexit.cpp b/clang/test/Driver/cxa-atexit.cpp
index ae955ea5a7dfc..336756dedcecd 100644
--- a/clang/test/Driver/cxa-atexit.cpp
+++ b/clang/test/Driver/cxa-atexit.cpp
@@ -19,7 +19,7 @@
 // RUN: %clang -### -target sparc-sun-solaris -c %s -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-SOLARIS
 
 // CHECK-WINDOWS: "-fno-use-cxa-atexit"
-// CHECK-SOLARIS: "-fno-use-cxa-atexit"
+// CHECK-SOLARIS-NOT: "-fno-use-cxa-atexit"
 // CHECK-HEXAGON-NOT: "-fno-use-cxa-atexit"
 // CHECK-XCORE: "-fno-use-cxa-atexit"
 // CHECK-MTI: "-fno-use-cxa-atexit"
diff --git a/clang/test/Driver/solaris-opts.c b/clang/test/Driver/solaris-opts.c
index 8c54ae0a628d2..33d769efaa179 100644
--- a/clang/test/Driver/solaris-opts.c
+++ b/clang/test/Driver/solaris-opts.c
@@ -1,4 +1,4 @@
 // RUN: %clang %s --target=sparc-sun-solaris2.11 -### -o %t.o 2>&1 | FileCheck %s
 
-// CHECK: "-fno-use-cxa-atexit"
+// CHECK-NOT: "-fno-use-cxa-atexit"
 

From 9eb95903da4575b3b95d1a743d48e51bb5026ccd Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Wed, 17 Jul 2019 08:52:09 +0000
Subject: [PATCH 325/451] [OpenCL][Sema] Minor refactoring and constraint
 checking

Summary:
Simplify code a bit and add assertion to address post-landing comments
from D64083.

Subscribers: yaxunl, Anastasia, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64804

llvm-svn: 366306
---
 clang/lib/Sema/SemaExprCXX.cpp | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index df8638a013623..cdca2e8cac6b7 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -4216,17 +4216,12 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
     break;
 
   case ICK_Block_Pointer_Conversion: {
-    QualType LHSType = Context.getCanonicalType(ToType).getUnqualifiedType();
-    QualType RHSType = Context.getCanonicalType(FromType).getUnqualifiedType();
-
-    // Assumptions based on Sema::IsBlockPointerConversion.
-    assert(isa<BlockPointerType>(LHSType) && "BlockPointerType expected");
-    assert(isa<BlockPointerType>(RHSType) && "BlockPointerType expected");
-
     LangAS AddrSpaceL =
-        LHSType->getAs<BlockPointerType>()->getPointeeType().getAddressSpace();
+        ToType->castAs<BlockPointerType>()->getPointeeType().getAddressSpace();
     LangAS AddrSpaceR =
-        RHSType->getAs<BlockPointerType>()->getPointeeType().getAddressSpace();
+        FromType->castAs<BlockPointerType>()->getPointeeType().getAddressSpace();
+    assert(Qualifiers::isAddressSpaceSupersetOf(AddrSpaceL, AddrSpaceR) &&
+           "Invalid cast");
     CastKind Kind =
         AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast;
     From = ImpCastExprToType(From, ToType.getUnqualifiedType(), Kind,

From 2be0ebb0d80d75241ac7ef033153466317ff13a0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 17 Jul 2019 09:23:04 +0000
Subject: [PATCH 326/451] [ELF] Delete redundant pageAlign at PT_GNU_RELRO
 boundaries after D58892

Summary:
After D58892 split the RW PT_LOAD on the PT_GNU_RELRO boundary, the new
layout is:

PT_LOAD(PT_GNU_RELRO(.data.rel.ro .bss.rel.ro)) PT_LOAD(.data. .bss)

The two pageAlign() calls at PT_GNU_RELRO boundaries are redundant due
to the existence of PT_LOAD.

Reviewers: grimar, peter.smith, ruiu, espindola

Reviewed By: ruiu

Subscribers: sfertile, atanasyan, emaste, arichardson, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64854

llvm-svn: 366307
---
 lld/ELF/Writer.cpp | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 3cf7b056064f3..b8c8891648a46 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2223,25 +2223,6 @@ template <class ELFT> void Writer<ELFT>::fixSectionAlignments() {
     for (const PhdrEntry *p : part.phdrs)
       if (p->p_type == PT_LOAD && p->firstSec)
         pageAlign(p->firstSec);
-
-    for (const PhdrEntry *p : part.phdrs) {
-      if (p->p_type != PT_GNU_RELRO)
-        continue;
-
-      if (p->firstSec)
-        pageAlign(p->firstSec);
-
-      // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we
-      // have to align it to a page.
-      auto end = outputSections.end();
-      auto i = llvm::find(outputSections, p->lastSec);
-      if (i == end || (i + 1) == end)
-        continue;
-
-      OutputSection *cmd = (*(i + 1));
-      if (needsPtLoad(cmd))
-        pageAlign(cmd);
-    }
   }
 }
 

From 37e403d18c1a6dc0121a2e67e56fa348934f2018 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana.picus@linaro.org>
Date: Wed, 17 Jul 2019 10:01:27 +0000
Subject: [PATCH 327/451] [ARM GlobalISel] Cleanup CallLowering. NFC

Migrate CallLowering::lowerReturnVal to use the same infrastructure as
lowerCall/FormalArguments and remove the now obsolete code path from
splitToValueTypes.

Forgot to push this earlier.

llvm-svn: 366308
---
 llvm/lib/Target/ARM/ARMCallLowering.cpp | 84 ++++++-------------------
 llvm/lib/Target/ARM/ARMCallLowering.h   |  7 +--
 2 files changed, 20 insertions(+), 71 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index fdd94ab25ef81..0cbe6e1871e4b 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -184,22 +184,21 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler {
 
 } // end anonymous namespace
 
-void ARMCallLowering::splitToValueTypes(
-    const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
-    MachineFunction &MF, const SplitArgTy &PerformArgSplit) const {
+void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+                                        SmallVectorImpl<ArgInfo> &SplitArgs,
+                                        MachineFunction &MF) const {
   const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
   LLVMContext &Ctx = OrigArg.Ty->getContext();
   const DataLayout &DL = MF.getDataLayout();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   const Function &F = MF.getFunction();
 
   SmallVector<EVT, 4> SplitVTs;
   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0);
+  assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
 
   if (SplitVTs.size() == 1) {
     // Even if there is no splitting to do, we still want to replace the
     // original type (e.g. pointer type -> integer).
-    assert(OrigArg.Regs.size() == 1 && "Regs / types mismatch");
     auto Flags = OrigArg.Flags;
     unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
     Flags.setOrigAlign(OriginalAlignment);
@@ -208,34 +207,7 @@ void ARMCallLowering::splitToValueTypes(
     return;
   }
 
-  if (OrigArg.Regs.size() > 1) {
-    // Create one ArgInfo for each virtual register.
-    assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
-    for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
-      EVT SplitVT = SplitVTs[i];
-      Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
-      auto Flags = OrigArg.Flags;
-
-      unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy);
-      Flags.setOrigAlign(OriginalAlignment);
-
-      bool NeedsConsecutiveRegisters =
-          TLI.functionArgumentNeedsConsecutiveRegisters(
-              SplitTy, F.getCallingConv(), F.isVarArg());
-      if (NeedsConsecutiveRegisters) {
-        Flags.setInConsecutiveRegs();
-        if (i == e - 1)
-          Flags.setInConsecutiveRegsLast();
-      }
-
-      // FIXME: We also want to split SplitTy further.
-      Register PartReg = OrigArg.Regs[i];
-      SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed);
-    }
-
-    return;
-  }
-
+  // Create one ArgInfo for each virtual register.
   for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
     EVT SplitVT = SplitVTs[i];
     Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
@@ -253,10 +225,9 @@ void ARMCallLowering::splitToValueTypes(
         Flags.setInConsecutiveRegsLast();
     }
 
-    Register PartReg =
-        MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL));
-    SplitArgs.push_back(ArgInfo{PartReg, SplitTy, Flags, OrigArg.IsFixed});
-    PerformArgSplit(PartReg);
+    // FIXME: We also want to split SplitTy further.
+    Register PartReg = OrigArg.Regs[i];
+    SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed);
   }
 }
 
@@ -277,29 +248,17 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
   if (!isSupportedType(DL, TLI, Val->getType()))
     return false;
 
-  SmallVector<EVT, 4> SplitEVTs;
-  ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs);
-  assert(VRegs.size() == SplitEVTs.size() &&
-         "For each split Type there should be exactly one VReg.");
-
-  SmallVector<ArgInfo, 4> SplitVTs;
-  LLVMContext &Ctx = Val->getType()->getContext();
-  for (unsigned i = 0; i < SplitEVTs.size(); ++i) {
-    ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx));
-    setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F);
-
-    SmallVector<Register, 4> Regs;
-    splitToValueTypes(CurArgInfo, SplitVTs, MF,
-                      [&](Register Reg) { Regs.push_back(Reg); });
-    if (Regs.size() > 1)
-      MIRBuilder.buildUnmerge(Regs, VRegs[i]);
-  }
+  ArgInfo OrigRetInfo(VRegs, Val->getType());
+  setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
+
+  SmallVector<ArgInfo, 4> SplitRetInfos;
+  splitToValueTypes(OrigRetInfo, SplitRetInfos, MF);
 
   CCAssignFn *AssignFn =
       TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
 
   OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
-  return handleAssignments(MIRBuilder, SplitVTs, RetHandler);
+  return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler);
 }
 
 bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -489,11 +448,9 @@ bool ARMCallLowering::lowerFormalArguments(
   unsigned Idx = 0;
   for (auto &Arg : F.args()) {
     ArgInfo OrigArgInfo(VRegs[Idx], Arg.getType());
-    setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F);
 
-    splitToValueTypes(OrigArgInfo, SplitArgInfos, MF, [&](Register Reg) {
-      llvm_unreachable("Args should already be split");
-    });
+    setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F);
+    splitToValueTypes(OrigArgInfo, SplitArgInfos, MF);
 
     Idx++;
   }
@@ -596,9 +553,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     if (Arg.Flags.isByVal())
       return false;
 
-    splitToValueTypes(Arg, ArgInfos, MF, [&](Register Reg) {
-      llvm_unreachable("Function args should already be split");
-    });
+    splitToValueTypes(Arg, ArgInfos, MF);
   }
 
   auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg);
@@ -614,10 +569,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
       return false;
 
     ArgInfos.clear();
-    splitToValueTypes(OrigRet, ArgInfos, MF, [&](Register Reg) {
-      llvm_unreachable("Call results should already be split");
-    });
-
+    splitToValueTypes(OrigRet, ArgInfos, MF);
     auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg);
     CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
     if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h
index d0f204491921d..794127b5ebc7e 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.h
+++ b/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -47,14 +47,11 @@ class ARMCallLowering : public CallLowering {
                       ArrayRef<Register> VRegs,
                       MachineInstrBuilder &Ret) const;
 
-  using SplitArgTy = std::function<void(unsigned Reg)>;
-
   /// Split an argument into one or more arguments that the CC lowering can cope
-  /// with (e.g. replace pointers with integers).
+  /// with.
   void splitToValueTypes(const ArgInfo &OrigArg,
                          SmallVectorImpl<ArgInfo> &SplitArgs,
-                         MachineFunction &MF,
-                         const SplitArgTy &PerformArgSplit) const;
+                         MachineFunction &MF) const;
 };
 
 } // end namespace llvm

From 247add6f3a37e73bfb01ab20d9fa3529655f4311 Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Wed, 17 Jul 2019 10:16:44 +0000
Subject: [PATCH 328/451] [llvm-ar][test] Add coverage for replace and update
 key letters

Some more tests to increase llvm-ar test coverage, this time for replace 'r' and update 'u'.

Differential Revision: https://reviews.llvm.org/D64803

llvm-svn: 366309
---
 llvm/test/tools/llvm-ar/replace-update.test | 189 ++++++++++++++++++++
 llvm/test/tools/llvm-ar/replace.test        | 178 ++++++++++++++++++
 2 files changed, 367 insertions(+)
 create mode 100644 llvm/test/tools/llvm-ar/replace-update.test
 create mode 100644 llvm/test/tools/llvm-ar/replace.test

diff --git a/llvm/test/tools/llvm-ar/replace-update.test b/llvm/test/tools/llvm-ar/replace-update.test
new file mode 100644
index 0000000000000..1b51dba1bca00
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/replace-update.test
@@ -0,0 +1,189 @@
+## Test that the replace command with "u" updates the relevant members.
+
+# RUN: rm -rf %t && mkdir -p %t/new/other
+
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+# RUN: yaml2obj %s -o %t/3.o --docnum=3
+
+# RUN: env TZ=GMT touch -t 200001020304 %t/1.o
+# RUN: env TZ=GMT touch -t 200001020304 %t/2.o
+# RUN: env TZ=GMT touch -t 200001020304 %t/3.o
+
+# RUN: yaml2obj %s -o %t/new/1.o --docnum=4
+# RUN: yaml2obj %s -o %t/new/3.o --docnum=5
+
+# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6
+
+# RUN: env TZ=GMT touch -t 200001020304 %t/new/other/1.o
+
+## Replace single member with newer file:
+# RUN: llvm-ar rcU %t/single.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/single.a %t/new/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
+
+# SINGLE:      1.o
+# SINGLE-NEXT: 2.o
+# SINGLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM:      symbolnew1
+# SINGLE-SYM-NEXT: symbol2
+# SINGLE-SYM-NEXT: symbol3
+
+## Replace new single member with older file:
+# RUN: llvm-ar ruU %t/single.a %t/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+## Replace multiple members with newer files:
+# RUN: llvm-ar rcuU %t/multiple.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/multiple.a %t/new/1.o %t/new/3.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# MULTIPLE:      1.o
+# MULTIPLE-NEXT: 2.o
+# MULTIPLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbolnew1
+# MULTIPLE-SYM-NEXT: symbol2
+# MULTIPLE-SYM-NEXTs: symbolnew3
+
+## Replace newer members with multiple older files:
+# RUN: llvm-ar ruU %t/multiple.a %t/1.o %t/2.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+## Replace same member with newer files:
+# RUN: llvm-ar rcuU %t/same.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/same.a %t/new/1.o %t/new/other/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}}
+
+# SAME:      1.o
+# SAME-NEXT: 2.o
+# SAME-NEXT: 3.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbolnew1
+# SAME-SYM-NEXT: symbol2
+# SAME-SYM-NEXT: symbol3
+# SAME-SYM-NEXT: symbolother1
+
+## Replace multiple members with an older file and a newer file:
+# RUN: llvm-ar rcuU %t/old-new.a %t/new/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/old-new.a %t/1.o %t/new/3.o
+# RUN: llvm-ar t %t/old-new.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/old-new.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+## Replace same member with an older file and a newer file:
+# RUN: llvm-ar rcuU %t/old-new-same.a %t/new/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/old-new-same.a %t/1.o %t/new/other/1.o
+# RUN: llvm-ar t %t/old-new-same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol3
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew3
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolother1
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/replace.test b/llvm/test/tools/llvm-ar/replace.test
new file mode 100644
index 0000000000000..2a7cddfdeb535
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/replace.test
@@ -0,0 +1,178 @@
+## Test the replace command without modifiers replaces the relevant members.
+
+# RUN: rm -rf %t && mkdir -p %t/new/other
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+# RUN: yaml2obj %s -o %t/3.o --docnum=3
+
+# RUN: yaml2obj %s -o %t/new/1.o --docnum=4
+# RUN: yaml2obj %s -o %t/new/3.o --docnum=5
+
+# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6
+
+## Replace single member:
+# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/single.a %t/new/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
+
+# SINGLE:      1.o
+# SINGLE-NEXT: 2.o
+# SINGLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM:      symbolnew1
+# SINGLE-SYM-NEXT: symbol2
+# SINGLE-SYM-NEXT: symbol3
+
+## Replace multiple members:
+# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/multiple.a %t/new/1.o %t/new/3.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# MULTIPLE:      1.o
+# MULTIPLE-NEXT: 2.o
+# MULTIPLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbolnew1
+# MULTIPLE-SYM-NEXT: symbol2
+# MULTIPLE-SYM-NEXT: symbolnew3
+
+## Replace same member:
+# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/same.a %t/new/1.o %t/new/other/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME  --implicit-check-not {{.}}
+
+# SAME:      1.o
+# SAME-NEXT: 2.o
+# SAME-NEXT: 3.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbolnew1
+# SAME-SYM-NEXT: symbol2
+# SAME-SYM-NEXT: symbol3
+# SAME-SYM-NEXT: symbolother1
+
+## Replace without member:
+# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/without.a
+# RUN: llvm-ar t %t/without.a \
+# RUN:   | FileCheck %s --check-prefix=WITHOUT --implicit-check-not {{.}}
+
+# WITHOUT:      1.o
+# WITHOUT-NEXT: 2.o
+# WITHOUT-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/without.a \
+# RUN:   | FileCheck %s --check-prefix=WITHOUT-SYM
+
+# WITHOUT-SYM:      symbol1
+# WITHOUT-SYM-NEXT: symbol2
+# WITHOUT-SYM-NEXT: symbol3
+
+## No archive:
+# RUN: not llvm-ar r 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+# NO-ARCHIVE: error: An archive name must be specified.
+
+## Member does not exist:
+# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o
+# RUN: not llvm-ar r %t/missing.a %t/missing.txt 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
+
+# MISSING-FILE: error: [[FILE]]: {{[Nn]}}o such file or directory.
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol3
+    Binding: STB_GLOBAL
+    Section: .text
+    
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew3
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolother1
+    Binding: STB_GLOBAL
+    Section: .text

From 11b06242a7e554f489c10bc9613be3e3c9e0c70a Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 17 Jul 2019 10:17:47 +0000
Subject: [PATCH 329/451] [clangd] Fix error message in tweaktests to be
 useful. NFC

llvm-svn: 366311
---
 clang-tools-extra/clangd/unittests/TweakTests.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp
index 7a0d88405b4f2..69f74e9d41b97 100644
--- a/clang-tools-extra/clangd/unittests/TweakTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Testing/Support/Error.h"
+#include "gmock/gmock-matchers.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <cassert>
@@ -134,10 +135,9 @@ void checkApplyContainsError(llvm::StringRef ID, llvm::StringRef Input,
   auto Result = apply(ID, Input);
   ASSERT_FALSE(Result) << "expected error message:\n   " << ErrorMessage <<
                        "\non input:" << Input;
-  EXPECT_NE(std::string::npos,
-            llvm::toString(Result.takeError()).find(ErrorMessage))
-            << "Wrong error message:\n  " << llvm::toString(Result.takeError())
-            << "\nexpected:\n  " << ErrorMessage;
+  EXPECT_THAT(llvm::toString(Result.takeError()),
+              testing::HasSubstr(ErrorMessage))
+      << Input;
 }
 
 TEST(TweakTest, SwapIfBranches) {

From 52c39396151978ca946e2a80d9118c8672bace14 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Wed, 17 Jul 2019 10:53:13 +0000
Subject: [PATCH 330/451] [TableGen] Do not set ReadNone attribute on
 intrinsics with side effects

If an intrinsic is defined without outputs, but having side effects,
it still can be removed completely from the program. This patch makes
TableGen not set Attribute::ReadNone for intrinsics which
are declared with IntrHasSideEffects.

Differential Revision: https://reviews.llvm.org/D64414

llvm-svn: 366312
---
 llvm/test/TableGen/intrin-side-effects.td  | 39 ++++++++++++++++++++++
 llvm/utils/TableGen/CodeGenDAGPatterns.cpp |  2 +-
 llvm/utils/TableGen/IntrinsicEmitter.cpp   |  4 ++-
 3 files changed, 43 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/TableGen/intrin-side-effects.td

diff --git a/llvm/test/TableGen/intrin-side-effects.td b/llvm/test/TableGen/intrin-side-effects.td
new file mode 100644
index 0000000000000..7588855830fae
--- /dev/null
+++ b/llvm/test/TableGen/intrin-side-effects.td
@@ -0,0 +1,39 @@
+// RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s | FileCheck %s
+
+// Get the minimum blurb necessary to process ...
+include "llvm/CodeGen/ValueTypes.td"
+include "llvm/CodeGen/SDNodeProperties.td"
+
+class LLVMType<ValueType vt> {
+  ValueType VT = vt;
+  int isAny = 0;
+}
+
+def llvm_i32_ty        : LLVMType<i32>;
+
+class IntrinsicProperty;
+def IntrNoMem : IntrinsicProperty;
+def IntrHasSideEffects : IntrinsicProperty;
+
+
+class Intrinsic<list<LLVMType> ret_types,
+                list<LLVMType> param_types = [],
+                list<IntrinsicProperty> intr_properties = [],
+                string name = "",
+                list<SDNodeProperty> sd_properties = []> : SDPatternOperator {
+  string LLVMName = name;
+  string TargetPrefix = "";
+  list<LLVMType> RetTypes = ret_types;
+  list<LLVMType> ParamTypes = param_types;
+  list<IntrinsicProperty> IntrProperties = intr_properties;
+  let Properties = sd_properties;
+
+  bit isTarget = 0;
+}
+
+// ... this intrinsic.
+def int_random_gen   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
+
+// CHECK: 1, // llvm.random.gen
+// CHECK: case 1:
+// CHECK-NEXT: Atts[] = {Attribute::NoUnwind}
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index a0e8696001b0e..c8f710d66a036 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2807,7 +2807,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
     // chain.
     if (Int.IS.RetVTs.empty())
       Operator = getDAGPatterns().get_intrinsic_void_sdnode();
-    else if (Int.ModRef != CodeGenIntrinsic::NoMem)
+    else if (Int.ModRef != CodeGenIntrinsic::NoMem || Int.hasSideEffects)
       // Has side-effects, requires chain.
       Operator = getDAGPatterns().get_intrinsic_w_chain_sdnode();
     else // Otherwise, no chain.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index bcb8af2fc56bf..6bcdc3d777a2d 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -685,7 +685,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
     }
 
     if (!intrinsic.canThrow ||
-        intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
+        (intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem && !intrinsic.hasSideEffects) ||
         intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate ||
         intrinsic.isConvergent || intrinsic.isSpeculatable) {
       OS << "      const Attribute::AttrKind Atts[] = {";
@@ -727,6 +727,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
 
       switch (intrinsic.ModRef) {
       case CodeGenIntrinsic::NoMem:
+        if (intrinsic.hasSideEffects)
+          break;
         if (addComma)
           OS << ",";
         OS << "Attribute::ReadNone";

From a256b8b7d77cdc9cf4675dcd784d008cb545b10b Mon Sep 17 00:00:00 2001
From: Nicolai Haehnle <nhaehnle@gmail.com>
Date: Wed, 17 Jul 2019 11:22:19 +0000
Subject: [PATCH 331/451] AMDGPU: Improve alias analysis for GDS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary: GDS cannot alias anything else.

Original patch by: Marek Olšák

Reviewers: arsenm, mareko

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64114

Change-Id: I07bfbd96f5d5c37a6dfba7997df12f291dd794b0
llvm-svn: 366313
---
 .../lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp |  8 +--
 .../CodeGen/AMDGPU/amdgpu-alias-analysis.ll   | 51 ++++++++++++++++---
 2 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index de54db3b1b02d..bba132c3bc46f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -56,13 +56,13 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
 // These arrays are indexed by address space value enum elements 0 ... to 7
 static const AliasResult ASAliasRules[8][8] = {
   /*                    Flat       Global    Region    Group     Constant  Private   Constant 32-bit  Buffer Fat Ptr */
-  /* Flat     */        {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias,        MayAlias},
+  /* Flat     */        {MayAlias, MayAlias, NoAlias,  MayAlias, MayAlias, MayAlias, MayAlias,        MayAlias},
   /* Global   */        {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias,        MayAlias},
-  /* Region   */        {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , MayAlias,        NoAlias},
+  /* Region   */        {NoAlias,  NoAlias , MayAlias, NoAlias , NoAlias,  NoAlias , NoAlias,         NoAlias},
   /* Group    */        {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias ,        NoAlias},
-  /* Constant */        {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , MayAlias,        MayAlias},
+  /* Constant */        {MayAlias, MayAlias, NoAlias,  NoAlias , NoAlias , NoAlias , MayAlias,        MayAlias},
   /* Private  */        {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias ,        NoAlias},
-  /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias ,        MayAlias},
+  /* Constant 32-bit */ {MayAlias, MayAlias, NoAlias,  NoAlias , MayAlias, NoAlias , NoAlias ,        MayAlias},
   /* Buffer Fat Ptr  */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias,        MayAlias}
 };
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
index 1f64208cf99d3..25ec7af9d2318 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
@@ -51,42 +51,77 @@ define void @test_999_1(i8 addrspace(999)* %p, i8 addrspace(1)* %p1) {
   ret void
 }
 
+; CHECK: NoAlias:  i8 addrspace(2)* %p, i8* %p1
+define void @test_region_vs_flat(i8 addrspace(2)* %p, i8 addrspace(0)* %p1) {
+  ret void
+}
+
+; CHECK: NoAlias:  i8 addrspace(1)* %p1, i8 addrspace(2)* %p
+define void @test_region_vs_global(i8 addrspace(2)* %p, i8 addrspace(1)* %p1) {
+  ret void
+}
+
+; CHECK: MayAlias: i8 addrspace(2)* %p, i8 addrspace(2)* %p1
+define void @test_region(i8 addrspace(2)* %p, i8 addrspace(2)* %p1) {
+  ret void
+}
+
+; CHECK: NoAlias:  i8 addrspace(2)* %p, i8 addrspace(3)* %p1
+define void @test_region_vs_group(i8 addrspace(2)* %p, i8 addrspace(3)* %p1) {
+  ret void
+}
+
+; CHECK: NoAlias:  i8 addrspace(2)* %p, i8 addrspace(4)* %p1
+define void @test_region_vs_constant(i8 addrspace(2)* %p, i8 addrspace(4)* %p1) {
+  ret void
+}
+
+; CHECK: NoAlias:  i8 addrspace(2)* %p, i8 addrspace(5)* %p1
+define void @test_region_vs_private(i8 addrspace(2)* %p, i8 addrspace(5)* %p1) {
+  ret void
+}
+
+; CHECK: NoAlias:  i8 addrspace(2)* %p, i8 addrspace(6)* %p1
+define void @test_region_vs_const32(i8 addrspace(2)* %p, i8 addrspace(6)* %p1) {
+  ret void
+}
+
 ; CHECK: MayAlias:  i8 addrspace(7)* %p, i8* %p1
 define void @test_7_0(i8 addrspace(7)* %p, i8 addrspace(0)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: MayAlias:  i8 addrspace(1)* %p1, i8 addrspace(7)* %p
 define void @test_7_1(i8 addrspace(7)* %p, i8 addrspace(1)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: NoAlias:  i8 addrspace(2)* %p1, i8 addrspace(7)* %p
 define void @test_7_2(i8 addrspace(7)* %p, i8 addrspace(2)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: NoAlias:  i8 addrspace(3)* %p1, i8 addrspace(7)* %p
 define void @test_7_3(i8 addrspace(7)* %p, i8 addrspace(3)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: MayAlias:  i8 addrspace(4)* %p1, i8 addrspace(7)* %p
 define void @test_7_4(i8 addrspace(7)* %p, i8 addrspace(4)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: NoAlias:  i8 addrspace(5)* %p1, i8 addrspace(7)* %p
 define void @test_7_5(i8 addrspace(7)* %p, i8 addrspace(5)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: MayAlias:  i8 addrspace(6)* %p1, i8 addrspace(7)* %p
 define void @test_7_6(i8 addrspace(7)* %p, i8 addrspace(6)* %p1) {
-    ret void
+  ret void
 }
 
 ; CHECK: MayAlias:  i8 addrspace(7)* %p, i8 addrspace(7)* %p1
 define void @test_7_7(i8 addrspace(7)* %p, i8 addrspace(7)* %p1) {
-    ret void
+  ret void
 }

From 8b7041a5c6f0a373d4886ca807d89790ad6dedab Mon Sep 17 00:00:00 2001
From: Nicolai Haehnle <nhaehnle@gmail.com>
Date: Wed, 17 Jul 2019 11:22:57 +0000
Subject: [PATCH 332/451] AMDGPU/GFX10: Apply the VMEM-to-scalar-write hazard
 also to writes to EXEC

Summary: Change-Id: I854fbf7d48e937bef9f8f3f5d0c8aeb970652630

Reviewers: rampitec, mareko

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64807

Change-Id: I4405b3a7f84186acea5a78d291bff71056e745fc
llvm-svn: 366314
---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp     |  2 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll    |  1 +
 llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir   | 14 ++++++++++++++
 llvm/test/CodeGen/AMDGPU/wave32.ll                 |  2 ++
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a23348e18f92d..885239e2faed3 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -920,7 +920,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
 
     for (const MachineOperand &Def : MI->defs()) {
       MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
-      if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
+      if (!Op)
         continue;
       return true;
     }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
index f35b0b43d3694..0f04c0c445f53 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -14,6 +14,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:  BB0_2: ; %bb
+; GCN-NEXT:    v_nop
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
index 630070c13a310..9d45c5b19e656 100644
--- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
@@ -92,6 +92,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_exec_impread
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
+# GCN:      V_NOP
 # GCN-NEXT: S_MOV_B64
 ---
 name:            vmem_write_exec_impread
@@ -208,3 +209,16 @@ body:             |
     $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
     S_BRANCH %bb.0
 ...
+# GCN-LABEL: name: ds_write_exec
+# GCN:      DS_WRITE_B32_gfx9
+# GCN-NEXT: V_NOP
+# GCN-NEXT: S_MOV_B32
+---
+name:            ds_write_exec
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1 = IMPLICIT_DEF
+    DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec
+    $exec_lo = S_MOV_B32 -1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 781ebbb268fec..a71ca5db7658d 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1073,6 +1073,7 @@ declare void @external_void_func_void() #1
 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT: v_nop
 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
 
@@ -1095,6 +1096,7 @@ declare void @external_void_func_void() #1
 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT: v_nop
 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)

From e14cfe2d2ea2d0ef7eccc7905dbe01e943fea9e8 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Wed, 17 Jul 2019 11:24:37 +0000
Subject: [PATCH 333/451] [AArch64] Consistent types and naming for AArch64
 target features (NFC)

Differential Revision: https://reviews.llvm.org/D64415

Committed as obvious.

llvm-svn: 366315
---
 clang/lib/Basic/Targets/AArch64.cpp | 34 ++++++++++++++---------------
 clang/lib/Basic/Targets/AArch64.h   | 15 +++++++------
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index a02530ad06756..74ac69ab8946a 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -199,13 +199,13 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (FPU & SveMode)
     Builder.defineMacro("__ARM_FEATURE_SVE", "1");
 
-  if (CRC)
+  if (HasCRC)
     Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
 
-  if (Crypto)
+  if (HasCrypto)
     Builder.defineMacro("__ARM_FEATURE_CRYPTO", "1");
 
-  if (Unaligned)
+  if (HasUnaligned)
     Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");
 
   if ((FPU & NeonMode) && HasFullFP16)
@@ -263,13 +263,13 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
 bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
                                              DiagnosticsEngine &Diags) {
   FPU = FPUMode;
-  CRC = 0;
-  Crypto = 0;
-  Unaligned = 1;
-  HasFullFP16 = 0;
-  HasDotProd = 0;
-  HasFP16FML = 0;
-  HasMTE = 0;
+  HasCRC = false;
+  HasCrypto = false;
+  HasUnaligned = true;
+  HasFullFP16 = false;
+  HasDotProd = false;
+  HasFP16FML = false;
+  HasMTE = false;
   ArchKind = llvm::AArch64::ArchKind::ARMV8A;
 
   for (const auto &Feature : Features) {
@@ -278,11 +278,11 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     if (Feature == "+sve")
       FPU |= SveMode;
     if (Feature == "+crc")
-      CRC = 1;
+      HasCRC = true;
     if (Feature == "+crypto")
-      Crypto = 1;
+      HasCrypto = true;
     if (Feature == "+strict-align")
-      Unaligned = 0;
+      HasUnaligned = false;
     if (Feature == "+v8.1a")
       ArchKind = llvm::AArch64::ArchKind::ARMV8_1A;
     if (Feature == "+v8.2a")
@@ -294,13 +294,13 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     if (Feature == "+v8.5a")
       ArchKind = llvm::AArch64::ArchKind::ARMV8_5A;
     if (Feature == "+fullfp16")
-      HasFullFP16 = 1;
+      HasFullFP16 = true;
     if (Feature == "+dotprod")
-      HasDotProd = 1;
+      HasDotProd = true;
     if (Feature == "+fp16fml")
-      HasFP16FML = 1;
+      HasFP16FML = true;
     if (Feature == "+mte")
-      HasMTE = 1;
+      HasMTE = true;
   }
 
   setDataLayout();
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index de0aed78e037e..5833c146003b0 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -28,13 +28,14 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   enum FPUModeEnum { FPUMode, NeonMode = (1 << 0), SveMode = (1 << 1) };
 
   unsigned FPU;
-  unsigned CRC;
-  unsigned Crypto;
-  unsigned Unaligned;
-  unsigned HasFullFP16;
-  unsigned HasDotProd;
-  unsigned HasFP16FML;
-  unsigned HasMTE;
+  bool HasCRC;
+  bool HasCrypto;
+  bool HasUnaligned;
+  bool HasFullFP16;
+  bool HasDotProd;
+  bool HasFP16FML;
+  bool HasMTE;
+
   llvm::AArch64::ArchKind ArchKind;
 
   static const Builtin::Info BuiltinInfo[];

From 60bd7a9c13bdb6da854c4a7b595407c0ce18b55d Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Wed, 17 Jul 2019 11:48:29 +0000
Subject: [PATCH 334/451] [lldb][NFC] Tablegenify watchpoint commands

Part of the project that migrates these struct initializers to our
new lldb-tablegen.

llvm-svn: 366316
---
 .../Commands/CommandObjectWatchpoint.cpp      | 17 +++------
 .../CommandObjectWatchpointCommand.cpp        |  8 +---
 lldb/source/Commands/Options.td               | 38 +++++++++++++++++++
 3 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp
index 3c3bd2d12095a..98e758b7ef6a7 100644
--- a/lldb/source/Commands/CommandObjectWatchpoint.cpp
+++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp
@@ -148,11 +148,8 @@ bool CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs(
 #pragma mark List::CommandOptions
 
 static constexpr OptionDefinition g_watchpoint_list_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1, false, "brief",   'b', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Give a brief description of the watchpoint (no location info)." },
-  { LLDB_OPT_SET_2, false, "full",    'f', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Give a full description of the watchpoint and its locations." },
-  { LLDB_OPT_SET_3, false, "verbose", 'v', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Explain everything we know about the watchpoint (for debugging debugger bugs)." }
-    // clang-format on
+#define LLDB_OPTIONS_watchpoint_list
+#include "CommandOptions.inc"
 };
 
 #pragma mark List
@@ -511,9 +508,8 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed {
 
 #pragma mark Ignore::CommandOptions
 static constexpr OptionDefinition g_watchpoint_ignore_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, true, "ignore-count", 'i', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeCount, "Set the number of times this watchpoint is skipped before stopping." }
-    // clang-format on
+#define LLDB_OPTIONS_watchpoint_ignore
+#include "CommandOptions.inc"
 };
 
 class CommandObjectWatchpointIgnore : public CommandObjectParsed {
@@ -631,9 +627,8 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed {
 #pragma mark Modify::CommandOptions
 
 static constexpr OptionDefinition g_watchpoint_modify_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "condition", 'c', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeExpression, "The watchpoint stops only if this condition expression evaluates to true." }
-    // clang-format on
+#define LLDB_OPTIONS_watchpoint_modify
+#include "CommandOptions.inc"
 };
 
 #pragma mark Modify
diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
index 8be6688fc3a69..2be0b5b154e0f 100644
--- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
+++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
@@ -43,12 +43,8 @@ static constexpr OptionEnumValues ScriptOptionEnum() {
 }
 
 static constexpr OptionDefinition g_watchpoint_command_add_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1,   false, "one-liner",       'o', OptionParser::eRequiredArgument, nullptr, {},                 0, eArgTypeOneLiner,       "Specify a one-line watchpoint command inline. Be sure to surround it with quotes." },
-  { LLDB_OPT_SET_ALL, false, "stop-on-error",   'e', OptionParser::eRequiredArgument, nullptr, {},                 0, eArgTypeBoolean,        "Specify whether watchpoint command execution should terminate on error." },
-  { LLDB_OPT_SET_ALL, false, "script-type",     's', OptionParser::eRequiredArgument, nullptr, ScriptOptionEnum(), 0, eArgTypeNone,           "Specify the language for the commands - if none is specified, the lldb command interpreter will be used." },
-  { LLDB_OPT_SET_2,   false, "python-function", 'F', OptionParser::eRequiredArgument, nullptr, {},                 0, eArgTypePythonFunction, "Give the name of a Python function to run as command for this watchpoint. Be sure to give a module name if appropriate." }
-    // clang-format on
+#define LLDB_OPTIONS_watchpoint_command_add
+#include "CommandOptions.inc"
 };
 
 class CommandObjectWatchpointCommandAdd : public CommandObjectParsed,
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 1f4c09c386096..1d1bbbf7b7041 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -51,3 +51,41 @@ let Command = "breakpoint list" in {
     Desc<"List Dummy breakpoints - i.e. breakpoints set before a file is "
     "provided, which prime new targets.">;
 }
+
+let Command = "watchpoint list" in {
+  def watchpoint_list_brief : Option<"brief", "b">, Group<1>, Desc<"Give a "
+    "brief description of the watchpoint (no location info).">;
+  def watchpoint_list_full : Option<"full", "f">, Group<2>, Desc<"Give a full "
+    "description of the watchpoint and its locations.">;
+  def watchpoint_list_verbose : Option<"verbose", "v">, Group<3>, Desc<"Explain"
+    "everything we know about the watchpoint (for debugging debugger bugs).">;
+}
+
+let Command = "watchpoint ignore" in {
+  def watchpoint_ignore_ignore_count : Option<"ignore-count", "i">,
+    Arg<"Count">, Required, Desc<"Set the number of times this watchpoint is"
+    " skipped before stopping.">;
+}
+
+let Command = "watchpoint modify" in {
+  def watchpoint_modify_condition : Option<"condition", "c">, Arg<"Expression">,
+    Desc<"The watchpoint stops only if this condition expression evaluates "
+    "to true.">;
+}
+
+let Command = "watchpoint command add" in {
+  def watchpoint_command_add_one_liner : Option<"one-liner", "o">, Group<1>,
+    Arg<"OneLiner">, Desc<"Specify a one-line watchpoint command inline. Be "
+    "sure to surround it with quotes.">;
+  def watchpoint_command_add_stop_on_error : Option<"stop-on-error", "e">,
+    Arg<"Boolean">, Desc<"Specify whether watchpoint command execution should "
+    "terminate on error.">;
+  def watchpoint_command_add_script_type : Option<"script-type", "s">,
+    EnumArg<"None", "ScriptOptionEnum()">, Desc<"Specify the language for the"
+    " commands - if none is specified, the lldb command interpreter will be "
+    "used.">;
+  def watchpoint_command_add_python_function : Option<"python-function", "F">,
+    Group<2>, Arg<"PythonFunction">, Desc<"Give the name of a Python function "
+    "to run as command for this watchpoint. Be sure to give a module name if "
+    "appropriate.">;
+}

From 1e62635d0551578bf3899d2a1f4c835e30f2eed8 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@rt-rk.com>
Date: Wed, 17 Jul 2019 12:08:01 +0000
Subject: [PATCH 335/451] [MIPS GlobalISel] ClampScalar and select pointer
 G_ICMP

Add narrowScalar to half of original size for G_ICMP.
ClampScalar G_ICMP's operands 2 and 3 to to s32.
Select G_ICMP for pointers for MIPS32. Pointer compare is same
as for integers, it is enough to declare them as legal type.

Differential Revision: https://reviews.llvm.org/D64856

llvm-svn: 366317
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  36 ++
 llvm/lib/Target/Mips/MipsLegalizerInfo.cpp    |   3 +-
 .../GlobalISel/instruction-select/icmp.mir    | 168 +++---
 .../Mips/GlobalISel/legalizer/icmp.mir        | 487 +++++++++++++-----
 .../CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll   | 335 +++++++++---
 .../Mips/GlobalISel/regbankselect/icmp.mir    | 280 +---------
 6 files changed, 754 insertions(+), 555 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index bf3cca4115a13..958e9b59cf443 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -747,6 +747,42 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_ICMP: {
+    uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+    if (NarrowSize * 2 != SrcSize)
+      return UnableToLegalize;
+
+    Observer.changingInstr(MI);
+    Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
+    Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
+    MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
+
+    Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
+    Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
+    MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
+
+    CmpInst::Predicate Pred =
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+      MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
+      MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
+      MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
+      MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+      MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
+    } else {
+      const LLT s1 = LLT::scalar(1);
+      MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
+      MachineInstrBuilder CmpHEQ =
+          MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
+      MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
+          ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
+      MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
+    }
+    Observer.changedInstr(MI);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   }
 }
 
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 621f3e54a04bf..e442a81837edf 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -81,7 +81,8 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
     .minScalar(1, s32);
 
   getActionDefinitionsBuilder(G_ICMP)
-      .legalFor({{s32, s32}})
+      .legalForCartesianProduct({s32}, {s32, p0})
+      .clampScalar(1, s32, s32)
       .minScalar(0, s32);
 
   getActionDefinitionsBuilder(G_CONSTANT)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir
index 02086b0c86f61..0e6f1211b2b81 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir
@@ -2,20 +2,22 @@
 # RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
 --- |
 
-  define void @eq() {entry: ret void}
-  define void @ne() {entry: ret void}
-  define void @sgt() {entry: ret void}
-  define void @sge() {entry: ret void}
-  define void @slt() {entry: ret void}
-  define void @sle() {entry: ret void}
-  define void @ugt() {entry: ret void}
-  define void @uge() {entry: ret void}
-  define void @ult() {entry: ret void}
-  define void @ule() {entry: ret void}
+  define void @eq_i32() {entry: ret void}
+  define void @ne_i32() {entry: ret void}
+  define void @sgt_i32() {entry: ret void}
+  define void @sge_i32() {entry: ret void}
+  define void @slt_i32() {entry: ret void}
+  define void @sle_i32() {entry: ret void}
+  define void @ugt_i32() {entry: ret void}
+  define void @uge_i32() {entry: ret void}
+  define void @ult_i32() {entry: ret void}
+  define void @ule_i32() {entry: ret void}
+  define void @eq_ptr() {entry: ret void}
+
 
 ...
 ---
-name:            eq
+name:            eq_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -24,28 +26,24 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: eq
+    ; MIPS32-LABEL: name: eq_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[COPY]], [[COPY1]]
     ; MIPS32: [[SLTiu:%[0-9]+]]:gpr32 = SLTiu [[XOR]], 1
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTiu]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[SLTiu]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(eq), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ne
+name:            ne_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -54,28 +52,24 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: ne
+    ; MIPS32-LABEL: name: ne_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[COPY]], [[COPY1]]
     ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu $zero, [[XOR]]
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[SLTu]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(ne), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            sgt
+name:            sgt_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -84,27 +78,23 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: sgt
+    ; MIPS32-LABEL: name: sgt_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY1]], [[COPY]]
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLT]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[SLT]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(sgt), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            sge
+name:            sge_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -113,28 +103,24 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: sge
+    ; MIPS32-LABEL: name: sge_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY]], [[COPY1]]
     ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLT]], 1
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[XORi]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(sge), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            slt
+name:            slt_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -143,27 +129,23 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: slt
+    ; MIPS32-LABEL: name: slt_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY]], [[COPY1]]
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLT]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[SLT]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(slt), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            sle
+name:            sle_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -172,28 +154,24 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: sle
+    ; MIPS32-LABEL: name: sle_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY1]], [[COPY]]
     ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLT]], 1
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[XORi]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(sle), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ugt
+name:            ugt_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -202,27 +180,23 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: ugt
+    ; MIPS32-LABEL: name: ugt_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY1]], [[COPY]]
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[SLTu]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(ugt), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            uge
+name:            uge_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -231,28 +205,24 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: uge
+    ; MIPS32-LABEL: name: uge_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY]], [[COPY1]]
     ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLTu]], 1
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[XORi]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(uge), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ult
+name:            ult_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -261,27 +231,23 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: ult
+    ; MIPS32-LABEL: name: ult_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY]], [[COPY1]]
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[SLTu]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(ult), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ule
+name:            ule_i32
 alignment:       2
 legalized:       true
 regBankSelected: true
@@ -290,22 +256,44 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: ule
+    ; MIPS32-LABEL: name: ule_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
     ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY1]], [[COPY]]
     ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLTu]], 1
-    ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1
-    ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]]
-    ; MIPS32: $v0 = COPY [[AND]]
+    ; MIPS32: $v0 = COPY [[XORi]]
     ; MIPS32: RetRA implicit $v0
     %0:gprb(s32) = COPY $a0
     %1:gprb(s32) = COPY $a1
     %4:gprb(s32) = G_ICMP intpred(ule), %0(s32), %1
-    %5:gprb(s32) = G_CONSTANT i32 1
-    %6:gprb(s32) = COPY %4(s32)
-    %3:gprb(s32) = G_AND %6, %5
+    %3:gprb(s32) = COPY %4(s32)
+    $v0 = COPY %3(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            eq_ptr
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1
+
+    ; MIPS32-LABEL: name: eq_ptr
+    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
+    ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[COPY]], [[COPY1]]
+    ; MIPS32: [[SLTiu:%[0-9]+]]:gpr32 = SLTiu [[XOR]], 1
+    ; MIPS32: $v0 = COPY [[SLTiu]]
+    ; MIPS32: RetRA implicit $v0
+    %0:gprb(p0) = COPY $a0
+    %1:gprb(p0) = COPY $a1
+    %4:gprb(s32) = G_ICMP intpred(eq), %0(p0), %1
+    %3:gprb(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir
index 7f0196f902267..c60767a1afc9a 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir
@@ -2,276 +2,503 @@
 # RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
 --- |
 
-  define void @eq() {entry: ret void}
-  define void @ne() {entry: ret void}
-  define void @sgt() {entry: ret void}
-  define void @sge() {entry: ret void}
-  define void @slt() {entry: ret void}
-  define void @sle() {entry: ret void}
-  define void @ugt() {entry: ret void}
-  define void @uge() {entry: ret void}
-  define void @ult() {entry: ret void}
-  define void @ule() {entry: ret void}
+  define void @ne_i32() {entry: ret void}
+  define void @eq_ptr() {entry: ret void}
+  define void @ult_i8() {entry: ret void}
+  define void @slt_i16() {entry: ret void}
+  define void @eq_i64() {entry: ret void}
+  define void @ne_i64() {entry: ret void}
+  define void @sgt_i64() {entry: ret void}
+  define void @sge_i64() {entry: ret void}
+  define void @slt_i64() {entry: ret void}
+  define void @sle_i64() {entry: ret void}
+  define void @ugt_i64() {entry: ret void}
+  define void @uge_i64() {entry: ret void}
+  define void @ult_i64() {entry: ret void}
+  define void @ule_i64() {entry: ret void}
 
 ...
 ---
-name:            eq
+name:            ne_i32
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: eq
+    ; MIPS32-LABEL: name: ne_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
     ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: $v0 = COPY [[COPY2]](s32)
     ; MIPS32: RetRA implicit $v0
     %0:_(s32) = COPY $a0
     %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(eq), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
+    %2:_(s1) = G_ICMP intpred(ne), %0(s32), %1
+    %3:_(s32) = G_ANYEXT %2(s1)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ne
+name:            eq_ptr
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: ne
+    ; MIPS32-LABEL: name: eq_ptr
     ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]]
     ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: $v0 = COPY [[COPY2]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(ne), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(s1) = G_ICMP intpred(eq), %0(p0), %1
+    %3:_(s32) = G_ANYEXT %2(s1)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            sgt
+name:            ult_i8
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: sgt
+    ; MIPS32-LABEL: name: ult_i8
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
     ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]]
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+    ; MIPS32: $v0 = COPY [[COPY4]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(sgt), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %0:_(s8) = G_TRUNC %2(s32)
+    %3:_(s32) = COPY $a1
+    %1:_(s8) = G_TRUNC %3(s32)
+    %4:_(s1) = G_ICMP intpred(ult), %0(s8), %1
+    %5:_(s32) = G_ANYEXT %4(s1)
+    $v0 = COPY %5(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            sge
+name:            slt_i16
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: sge
+    ; MIPS32-LABEL: name: slt_i16
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32)
+    ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32)
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; MIPS32: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32)
+    ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32)
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+    ; MIPS32: $v0 = COPY [[COPY4]](s32)
+    ; MIPS32: RetRA implicit $v0
+    %2:_(s32) = COPY $a0
+    %0:_(s16) = G_TRUNC %2(s32)
+    %3:_(s32) = COPY $a1
+    %1:_(s16) = G_TRUNC %3(s32)
+    %4:_(s1) = G_ICMP intpred(slt), %0(s16), %1
+    %5:_(s32) = G_ANYEXT %4(s1)
+    $v0 = COPY %5(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            eq_i64
+alignment:       2
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2, $a3
+
+    ; MIPS32-LABEL: name: eq_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
+    ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]]
+    ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]]
+    ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]]
+    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+    ; MIPS32: $v0 = COPY [[COPY4]](s32)
+    ; MIPS32: RetRA implicit $v0
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(eq), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            ne_i64
+alignment:       2
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2, $a3
+
+    ; MIPS32-LABEL: name: ne_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
+    ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]]
+    ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]]
+    ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]]
+    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+    ; MIPS32: $v0 = COPY [[COPY4]](s32)
+    ; MIPS32: RetRA implicit $v0
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(ne), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
+    RetRA implicit $v0
+
+...
+---
+name:            sgt_i64
+alignment:       2
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2, $a3
+
+    ; MIPS32-LABEL: name: sgt_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
+    ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(sge), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(sgt), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            slt
+name:            sge_i64
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
-    liveins: $a0, $a1
+    liveins: $a0, $a1, $a2, $a3
 
-    ; MIPS32-LABEL: name: slt
-    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32-LABEL: name: sge_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(slt), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(sge), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            sle
+name:            slt_i64
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
-    liveins: $a0, $a1
+    liveins: $a0, $a1, $a2, $a3
 
-    ; MIPS32-LABEL: name: sle
-    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32-LABEL: name: slt_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(sle), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(slt), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ugt
+name:            sle_i64
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
-    liveins: $a0, $a1
+    liveins: $a0, $a1, $a2, $a3
 
-    ; MIPS32-LABEL: name: ugt
-    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32-LABEL: name: sle_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(ugt), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(sle), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            uge
+name:            ugt_i64
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
-    liveins: $a0, $a1
+    liveins: $a0, $a1, $a2, $a3
 
-    ; MIPS32-LABEL: name: uge
-    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32-LABEL: name: ugt_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(uge), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(ugt), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ult
+name:            uge_i64
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
-    liveins: $a0, $a1
+    liveins: $a0, $a1, $a2, $a3
 
-    ; MIPS32-LABEL: name: ult
-    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32-LABEL: name: uge_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(ult), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(uge), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ule
+name:            ult_i64
 alignment:       2
 tracksRegLiveness: true
 body:             |
   bb.1.entry:
-    liveins: $a0, $a1
+    liveins: $a0, $a1, $a2, $a3
 
-    ; MIPS32-LABEL: name: ule
-    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32-LABEL: name: ult_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
     ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY1]]
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
     ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %2:_(s1) = G_ICMP intpred(ule), %0(s32), %1
-    %3:_(s32) = G_ZEXT %2(s1)
-    $v0 = COPY %3(s32)
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(ult), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
     RetRA implicit $v0
 
 ...
+---
+name:            ule_i64
+alignment:       2
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1, $a2, $a3
 
+    ; MIPS32-LABEL: name: ule_i64
+    ; MIPS32: liveins: $a0, $a1, $a2, $a3
+    ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1
+    ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
+    ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3
+    ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
+    ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]]
+    ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32)
+    ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32)
+    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32)
+    ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]]
+    ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]]
+    ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32)
+    ; MIPS32: $v0 = COPY [[COPY7]](s32)
+    ; MIPS32: RetRA implicit $v0
+    %2:_(s32) = COPY $a0
+    %3:_(s32) = COPY $a1
+    %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %4:_(s32) = COPY $a2
+    %5:_(s32) = COPY $a3
+    %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+    %6:_(s1) = G_ICMP intpred(ule), %0(s64), %1
+    %7:_(s32) = G_ANYEXT %6(s1)
+    $v0 = COPY %7(s32)
+    RetRA implicit $v0
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
index fc5f3971af1a9..bb098761f3348 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
@@ -1,148 +1,343 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc  -O0 -mtriple=mipsel-linux-gnu -global-isel  -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32
 
-define i32 @eq(i32 %a, i32 %b){
-; MIPS32-LABEL: eq:
+define i1 @eq_i32(i32 %a, i32 %b){
+; MIPS32-LABEL: eq_i32:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    xor $1, $4, $5
-; MIPS32-NEXT:    sltiu $1, $1, 1
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    sltiu $2, $1, 1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp eq i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @ne(i32 %a, i32 %b) {
-; MIPS32-LABEL: ne:
+define i1 @ne_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: ne_i32:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    xor $1, $4, $5
-; MIPS32-NEXT:    sltu $1, $zero, $1
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    sltu $2, $zero, $1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp ne i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @sgt(i32 %a, i32 %b) {
-; MIPS32-LABEL: sgt:
+define i1 @sgt_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: sgt_i32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    slt $1, $5, $4
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    slt $2, $5, $4
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp sgt i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @sge(i32 %a, i32 %b) {
-; MIPS32-LABEL: sge:
+define i1 @sge_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: sge_i32:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    slt $1, $4, $5
-; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    xori $2, $1, 1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp sge i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @slt(i32 %a, i32 %b) {
-; MIPS32-LABEL: slt:
+define i1 @slt_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: slt_i32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    slt $1, $4, $5
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    slt $2, $4, $5
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp slt i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @sle(i32 %a, i32 %b) {
-; MIPS32-LABEL: sle:
+define i1 @sle_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: sle_i32:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    slt $1, $5, $4
-; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    xori $2, $1, 1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp sle i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @ugt(i32 %a, i32 %b) {
-; MIPS32-LABEL: ugt:
+define i1 @ugt_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: ugt_i32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sltu $1, $5, $4
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    sltu $2, $5, $4
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp ugt i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @uge(i32 %a, i32 %b) {
-; MIPS32-LABEL: uge:
+define i1 @uge_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: uge_i32:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    sltu $1, $4, $5
-; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    xori $2, $1, 1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp uge i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @ult(i32 %a, i32 %b) {
-; MIPS32-LABEL: ult:
+define i1 @ult_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: ult_i32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sltu $1, $4, $5
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    sltu $2, $4, $5
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp ult i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
 }
 
-define i32 @ule(i32 %a, i32 %b) {
-; MIPS32-LABEL: ule:
+define i1 @ule_i32(i32 %a, i32 %b) {
+; MIPS32-LABEL: ule_i32:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    sltu $1, $5, $4
-; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    xori $2, $1, 1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
   %cmp = icmp ule i32 %a, %b
-  %conv = zext i1 %cmp to i32
-  ret i32 %conv
+  ret i1 %cmp
+}
+
+define i1 @eq_ptr(i32* %a, i32* %b){
+; MIPS32-LABEL: eq_ptr:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    xor $1, $4, $5
+; MIPS32-NEXT:    sltiu $2, $1, 1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp eq i32* %a, %b
+  ret i1 %cmp
+}
+
+define i1 @ult_i8(i8 %a, i8 %b) {
+; MIPS32-LABEL: ult_i8:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    ori $1, $zero, 255
+; MIPS32-NEXT:    and $2, $4, $1
+; MIPS32-NEXT:    and $1, $5, $1
+; MIPS32-NEXT:    sltu $2, $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp ult i8 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @slt_i16(i16 %a, i16 %b) {
+; MIPS32-LABEL: slt_i16:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    sll $1, $4, 16
+; MIPS32-NEXT:    sra $1, $1, 16
+; MIPS32-NEXT:    sll $2, $5, 16
+; MIPS32-NEXT:    sra $2, $2, 16
+; MIPS32-NEXT:    slt $2, $1, $2
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp slt i16 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @eq_i64(i64 %a, i64 %b){
+; MIPS32-LABEL: eq_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    xor $1, $4, $6
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    xor $1, $1, $2
+; MIPS32-NEXT:    sltiu $2, $1, 1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp eq i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @ne_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: ne_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    xor $1, $4, $6
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    or $1, $1, $2
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    xor $1, $1, $2
+; MIPS32-NEXT:    sltu $2, $zero, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp ne i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @sgt_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: sgt_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    slt $1, $7, $5
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $6, $4
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp sgt i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @sge_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: sge_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    slt $1, $5, $7
+; MIPS32-NEXT:    xori $1, $1, 1
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $4, $6
+; MIPS32-NEXT:    xori $3, $3, 1
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp sge i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @slt_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: slt_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    slt $1, $5, $7
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $4, $6
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp slt i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @sle_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: sle_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    slt $1, $7, $5
+; MIPS32-NEXT:    xori $1, $1, 1
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $6, $4
+; MIPS32-NEXT:    xori $3, $3, 1
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp sle i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @ugt_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: ugt_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    sltu $1, $7, $5
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $6, $4
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp ugt i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @uge_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: uge_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    sltu $1, $5, $7
+; MIPS32-NEXT:    xori $1, $1, 1
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $4, $6
+; MIPS32-NEXT:    xori $3, $3, 1
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp uge i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @ult_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: ult_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    sltu $1, $5, $7
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $4, $6
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp ult i64 %a, %b
+  ret i1 %cmp
+}
+
+define i1 @ule_i64(i64 %a, i64 %b) {
+; MIPS32-LABEL: ule_i64:
+; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    sltu $1, $7, $5
+; MIPS32-NEXT:    xori $1, $1, 1
+; MIPS32-NEXT:    xor $2, $5, $7
+; MIPS32-NEXT:    sltiu $2, $2, 1
+; MIPS32-NEXT:    sltu $3, $6, $4
+; MIPS32-NEXT:    xori $3, $3, 1
+; MIPS32-NEXT:    ori $4, $zero, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    movn $1, $3, $2
+; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    jr $ra
+; MIPS32-NEXT:    nop
+entry:
+  %cmp = icmp ule i64 %a, %b
+  ret i1 %cmp
 }
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir
index d03113a5be208..797c76a3cd61b 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir
@@ -2,20 +2,12 @@
 # RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
 --- |
 
-  define void @eq() {entry: ret void}
-  define void @ne() {entry: ret void}
-  define void @sgt() {entry: ret void}
-  define void @sge() {entry: ret void}
-  define void @slt() {entry: ret void}
-  define void @sle() {entry: ret void}
-  define void @ugt() {entry: ret void}
-  define void @uge() {entry: ret void}
-  define void @ult() {entry: ret void}
-  define void @ule() {entry: ret void}
+  define void @ne_i32() {entry: ret void}
+  define void @eq_ptr() {entry: ret void}
 
 ...
 ---
-name:            eq
+name:            ne_i32
 alignment:       2
 legalized:       true
 tracksRegLiveness: true
@@ -23,231 +15,24 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: eq
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(eq), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            ne
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: ne
+    ; MIPS32-LABEL: name: ne_i32
     ; MIPS32: liveins: $a0, $a1
     ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
     ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
     ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
     ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: $v0 = COPY [[COPY2]](s32)
     ; MIPS32: RetRA implicit $v0
     %0:_(s32) = COPY $a0
     %1:_(s32) = COPY $a1
     %4:_(s32) = G_ICMP intpred(ne), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            sgt
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: sgt
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(sgt), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            sge
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: sge
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(sge), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(sge), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            slt
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: slt
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(slt), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            sle
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: sle
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(sle), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(sle), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            ugt
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: ugt
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(ugt), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            uge
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: uge
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(uge), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
+    %3:_(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 
 ...
 ---
-name:            ult
+name:            eq_ptr
 alignment:       2
 legalized:       true
 tracksRegLiveness: true
@@ -255,51 +40,18 @@ body:             |
   bb.1.entry:
     liveins: $a0, $a1
 
-    ; MIPS32-LABEL: name: ult
+    ; MIPS32-LABEL: name: eq_ptr
     ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
+    ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
+    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]]
     ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
+    ; MIPS32: $v0 = COPY [[COPY2]](s32)
     ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(ult), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
-    $v0 = COPY %3(s32)
-    RetRA implicit $v0
-
-...
----
-name:            ule
-alignment:       2
-legalized:       true
-tracksRegLiveness: true
-body:             |
-  bb.1.entry:
-    liveins: $a0, $a1
-
-    ; MIPS32-LABEL: name: ule
-    ; MIPS32: liveins: $a0, $a1
-    ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0
-    ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1
-    ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY1]]
-    ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1
-    ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32)
-    ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]]
-    ; MIPS32: $v0 = COPY [[AND]](s32)
-    ; MIPS32: RetRA implicit $v0
-    %0:_(s32) = COPY $a0
-    %1:_(s32) = COPY $a1
-    %4:_(s32) = G_ICMP intpred(ule), %0(s32), %1
-    %5:_(s32) = G_CONSTANT i32 1
-    %6:_(s32) = COPY %4(s32)
-    %3:_(s32) = G_AND %6, %5
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %4:_(s32) = G_ICMP intpred(eq), %0(p0), %1
+    %3:_(s32) = COPY %4(s32)
     $v0 = COPY %3(s32)
     RetRA implicit $v0
 

From 5214956eaaa10a92794514558525ef6934486e90 Mon Sep 17 00:00:00 2001
From: Justin Hibbits <jrh29@alumni.cwru.edu>
Date: Wed, 17 Jul 2019 12:30:04 +0000
Subject: [PATCH 336/451] PowerPC/SPE: Fix load/store handling for SPE

Summary:
Pointed out in a comment for D49754, register spilling will currently
spill SPE registers at almost any offset.  However, the instructions
`evstdd` and `evldd` require a) 8-byte alignment, and b) a limit of 256
(unsigned) bytes from the base register, as the offset must fix into a
5-bit offset, which ranges from 0-31 (indexed in double-words).

The update to the register spill test is taken partially from the test
case shown in D49754.

Additionally, pointed out by Kei Thomsen, globals will currently use
evldd/evstdd, though the offset isn't known at compile time, so may
exceed the 8-bit (unsigned) offset permitted.  This fixes that as well,
by forcing it to always use evlddx/evstddx when accessing globals.

Part of the patch contributed by Kei Thomsen.

Reviewers: nemanjai, hfinkel, joerg

Subscribers: kbarton, jsji, llvm-commits

Differential Revision: https://reviews.llvm.org/D54409

llvm-svn: 366318
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 23 +++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |  5 +++++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp |  8 ++++++-
 llvm/test/CodeGen/PowerPC/spe.ll            | 25 ++++++++++++++++++---
 4 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d014e0070950a..24d50074860d7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2233,6 +2233,25 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
+
+/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
+/// be represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
+                                               SDValue &Index,
+                                               SelectionDAG &DAG) const {
+  for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+      UI != E; ++UI) {
+    if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+      if (Memop->getMemoryVT() == MVT::f64) {
+          Base = N.getOperand(0);
+          Index = N.getOperand(1);
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
@@ -2244,6 +2263,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
                                             unsigned EncodingAlignment) const {
   int16_t imm = 0;
   if (N.getOpcode() == ISD::ADD) {
+    // Is there any SPE load/store (f64), which can't handle 16bit offset?
+    // SPE load/store can only handle 8-bit offsets.
+    if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
+        return true;
     if (isIntS16Immediate(N.getOperand(1), imm) &&
         (!EncodingAlignment || !(imm % EncodingAlignment)))
       return false; // r+i
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 73c6dcd7c859e..97422c6eda360 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -669,6 +669,11 @@ namespace llvm {
                                    ISD::MemIndexedMode &AM,
                                    SelectionDAG &DAG) const override;
 
+    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+    /// it can be more efficiently represented as [r+imm].
+    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                                SelectionDAG &DAG) const;
+
     /// SelectAddressRegReg - Given the specified addressed, check to see if it
     /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
     /// is non-zero, only accept displacement which is not suitable for [r+imm].
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 32b3d7e61d056..0498812050261 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -938,6 +938,9 @@ static unsigned offsetMinAlignForOpcode(unsigned OpC) {
   case PPC::STXSD:
   case PPC::STXSSP:
     return 4;
+  case PPC::EVLDD:
+  case PPC::EVSTDD:
+    return 8;
   case PPC::LXV:
   case PPC::STXV:
     return 16;
@@ -1060,7 +1063,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // happen in invalid code.
   assert(OpC != PPC::DBG_VALUE &&
          "This should be handled in a target-independent way");
-  if (!noImmForm && ((isInt<16>(Offset) &&
+  bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
+                            isUInt<8>(Offset) :
+                            isInt<16>(Offset);
+  if (!noImmForm && ((OffsetFitsMnemonic &&
                       ((Offset % offsetMinAlign(MI)) == 0)) ||
                      OpC == TargetOpcode::STACKMAP ||
                      OpC == TargetOpcode::PATCHPOINT)) {
diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll
index 06915ceb2dbab..bd03fa30e6a70 100644
--- a/llvm/test/CodeGen/PowerPC/spe.ll
+++ b/llvm/test/CodeGen/PowerPC/spe.ll
@@ -523,18 +523,37 @@ entry:
 ; CHECK: #NO_APP
 }
 
-define double @test_spill(double %a) nounwind {
+declare double @test_spill_spe_regs(double, double);
+define dso_local void @test_func2() #0 {
 entry:
+  ret void
+}
+
+declare void @test_memset(i8* nocapture writeonly, i8, i32, i1)
+@global_var1 = global i32 0, align 4
+define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind {
+entry:
+  %v1 = alloca [13 x i32], align 4
+  %v2 = alloca [11 x i32], align 4
   %0 = fadd double %a, %a
-  call void asm sideeffect "","~{r0},~{r3},~{s4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind
   %1 = fadd double %0, 3.14159
+  %2 = bitcast [13 x i32]* %v1 to i8*
+  call void @test_memset(i8* align 4 %2, i8 0, i32 24, i1 true)
+  store i32 0, i32* %a5, align 4
+  call void @test_func2()
+  %3 = bitcast [11 x i32]* %v2 to i8*
+  call void @test_memset(i8* align 4 %3, i8 0, i32 20, i1 true)
   br label %return
 
 return:
   ret double %1
 
 ; CHECK-LABEL: test_spill
-; CHECK: efdadd
+; CHECK: li [[VREG:[0-9]+]], 256
+; CHECK: evstddx {{[0-9]+}}, {{[0-9]+}}, [[VREG]]
+; CHECK-NOT: evstdd {{[0-9]+}}, 256({{[0-9]+}}
 ; CHECK: evstdd
+; CHECK: efdadd
 ; CHECK: evldd
 }

From 0257c6b659f1a81e6071b606da49c91d5d586c7d Mon Sep 17 00:00:00 2001
From: Justin Hibbits <jrh29@alumni.cwru.edu>
Date: Wed, 17 Jul 2019 12:30:48 +0000
Subject: [PATCH 337/451] PowerPC: Fix register spilling for SPE registers

Summary:
Missed in the original commit, use the correct callee-saved register
list for spilling, instead of the standard SVR432 list.  This avoids
needlessly spilling the SPE non-volatile registers when they're not used.

As part of this, also add where missing, and sort, the spill opcode
checks for SPE and SPE4 register classes.

Reviewers: nemanjai, hfinkel, joerg

Subscribers: kbarton, jsji, llvm-commits

Differential Revision: https://reviews.llvm.org/D56703

llvm-svn: 366319
---
 llvm/lib/Target/PowerPC/PPCCallingConv.td   | 15 ++++--
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp    |  2 +
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 55 +++++++++++++--------
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index ee367214dd7a9..369b9ce1a711e 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -366,15 +366,22 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
 // and value may be altered by inter-library calls.
 // Do not include r12 as it is used as a scratch register.
 // Do not include return registers r3, f1, v2.
-def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
-                                          (sequence "R%u", 14, 31),
-                                          F0, (sequence "F%u", 2, 31),
-                                          (sequence "CR%u", 0, 7))>;
+def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10),
+                                                (sequence "R%u", 14, 31),
+                                                (sequence "CR%u", 0, 7))>;
+
+def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+                                          F0, (sequence "F%u", 2, 31))>;
+
 
 def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC,
                                             (sequence "V%u", 0, 1),
                                             (sequence "V%u", 3, 31))>;
 
+def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common,
+                                            (sequence "S%u", 4, 10),
+                                            (sequence "S%u", 14, 31))>;
+
 def CSR_SVR64_ColdCC : CalleeSavedRegs<(add  (sequence "X%u", 4, 10),
                                              (sequence "X%u", 14, 31),
                                              F0, (sequence "F%u", 2, 31),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index ea406a20df8ae..a787bdd56b9d7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1009,6 +1009,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = PPC::QVFMRb;
   else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::CROR;
+  else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR;
   else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
     Opc = PPC::EVOR;
   else
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 0498812050261..12554ea8d0797 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -159,30 +159,39 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
     return CSR_SRV464_TLS_PE_SaveList;
 
-  if (Subtarget.hasSPE())
-    return CSR_SVR432_SPE_SaveList;
-
   // On PPC64, we might need to save r2 (but only if it is not reserved).
   bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
 
+  // Cold calling convention CSRs.
   if (MF->getFunction().getCallingConv() == CallingConv::Cold) {
-    return TM.isPPC64()
-               ? (Subtarget.hasAltivec()
-                      ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
-                                : CSR_SVR64_ColdCC_Altivec_SaveList)
-                      : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
-                                : CSR_SVR64_ColdCC_SaveList))
-               : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList
-                                         : CSR_SVR32_ColdCC_SaveList);
+    if (TM.isPPC64()) {
+      if (Subtarget.hasAltivec())
+        return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
+                      : CSR_SVR64_ColdCC_Altivec_SaveList;
+      return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
+                    : CSR_SVR64_ColdCC_SaveList;
+    }
+    // 32-bit targets.
+    if (Subtarget.hasAltivec())
+      return CSR_SVR32_ColdCC_Altivec_SaveList;
+    else if (Subtarget.hasSPE())
+      return CSR_SVR32_ColdCC_SPE_SaveList;
+    return CSR_SVR32_ColdCC_SaveList;
   }
-
-  return TM.isPPC64()
-             ? (Subtarget.hasAltivec()
-                    ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
-                              : CSR_SVR464_Altivec_SaveList)
-                    : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
-             : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
-                                       : CSR_SVR432_SaveList);
+  // Standard calling convention CSRs.
+  if (TM.isPPC64()) {
+    if (Subtarget.hasAltivec())
+      return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+                    : CSR_SVR464_Altivec_SaveList;
+    return SaveR2 ? CSR_SVR464_R2_SaveList
+                  : CSR_SVR464_SaveList;
+  }
+  // 32-bit targets.
+  if (Subtarget.hasAltivec())
+    return CSR_SVR432_Altivec_SaveList;
+  else if (Subtarget.hasSPE())
+    return CSR_SVR432_SPE_SaveList;
+  return CSR_SVR432_SaveList;
 }
 
 const MCPhysReg *
@@ -236,13 +245,17 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
     return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
                                                   : CSR_SVR64_ColdCC_RegMask)
                         : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
-                                                  : CSR_SVR32_ColdCC_RegMask);
+                                                  : (Subtarget.hasSPE()
+                                                  ? CSR_SVR32_ColdCC_SPE_RegMask
+                                                  : CSR_SVR32_ColdCC_RegMask));
   }
 
   return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
                                                 : CSR_SVR464_RegMask)
                       : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
-                                                : CSR_SVR432_RegMask);
+                                                : (Subtarget.hasSPE()
+                                                  ? CSR_SVR432_SPE_RegMask
+                                                  : CSR_SVR432_RegMask));
 }
 
 const uint32_t*

From 6011a285edf2cf45ee6d660b6219abe3db9e7dc1 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Wed, 17 Jul 2019 13:14:02 +0000
Subject: [PATCH 338/451] [clangd] Handle windows line endings in QueryDriver

Summary: fixes second case of https://github.com/clangd/clangd/issues/93

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64789

llvm-svn: 366320
---
 clang-tools-extra/clangd/QueryDriverDatabase.cpp            | 4 +++-
 clang-tools-extra/clangd/test/system-include-extractor.test | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp
index d71ee3184cf5f..110b8fc43fc2b 100644
--- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp
+++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp
@@ -63,7 +63,9 @@ std::vector<std::string> parseDriverOutput(llvm::StringRef Output) {
   llvm::SmallVector<llvm::StringRef, 8> Lines;
   Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
 
-  auto StartIt = std::find(Lines.begin(), Lines.end(), SIS);
+  auto StartIt =
+      std::find_if(Lines.begin(), Lines.end(),
+                   [](llvm::StringRef Line) { return Line.trim() == SIS; });
   if (StartIt == Lines.end()) {
     elog("System include extraction: start marker not found: {0}", Output);
     return {};
diff --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test
index 130afbd828c21..8f99db9122a6d 100644
--- a/clang-tools-extra/clangd/test/system-include-extractor.test
+++ b/clang-tools-extra/clangd/test/system-include-extractor.test
@@ -5,7 +5,7 @@
 # RUN: echo '#!/bin/bash' >> %t.dir/my_driver.sh
 # RUN: echo '[ "$0" = "%t.dir/my_driver.sh" ] || exit' >> %t.dir/my_driver.sh
 # RUN: echo 'echo line to ignore >&2' >> %t.dir/my_driver.sh
-# RUN: echo 'echo \#include \<...\> search starts here: >&2' >> %t.dir/my_driver.sh
+# RUN: echo 'echo -e "#include <...> search starts here:\r" >&2' >> %t.dir/my_driver.sh
 # RUN: echo 'echo %t.dir/my/dir/ >&2' >> %t.dir/my_driver.sh
 # RUN: echo 'echo %t.dir/my/dir2/ >&2' >> %t.dir/my_driver.sh
 # RUN: echo 'echo End of search list. >&2' >> %t.dir/my_driver.sh

From 2889fe67691b4220e31bbf8c78b63474c973f26c Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 17 Jul 2019 13:21:25 +0000
Subject: [PATCH 339/451] [clangd] Force the required interpretation of #import
 on windows tests.

Summary: NFC but should fix a bunch of tests.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64857

llvm-svn: 366321
---
 clang-tools-extra/clangd/unittests/TestTU.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp
index 4747f5ea2e3d6..7e77160870663 100644
--- a/clang-tools-extra/clangd/unittests/TestTU.cpp
+++ b/clang-tools-extra/clangd/unittests/TestTU.cpp
@@ -38,6 +38,10 @@ ParsedAST TestTU::build() const {
     Cmd.push_back("-include");
     Cmd.push_back(ImplicitHeaderGuard ? ImportThunk.c_str()
                                       : FullHeaderName.c_str());
+    // ms-compatibility changes the meaning of #import.
+    // The default is OS-dependent (on on windows), ensure it's off.
+    if (ImplicitHeaderGuard)
+      Cmd.push_back("-fno-ms-compatibility");
   }
   Cmd.insert(Cmd.end(), ExtraArgs.begin(), ExtraArgs.end());
   // Put the file name at the end -- this allows the extra arg (-xc++) to

From 4b8da3a503e434ddbc08ecf66582475765f449bc Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Wed, 17 Jul 2019 13:23:27 +0000
Subject: [PATCH 340/451] [AArch64] Add support for Transactional Memory
 Extension (TME)

TME is a future architecture technology, documented in

https://developer.arm.com/architectures/cpu-architecture/a-profile/exploration-tools
https://developer.arm.com/docs/ddi0601/a

More about the future architectures:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/new-technologies-for-the-arm-a-profile-architecture

This patch adds support for the TME instructions TSTART, TTEST, TCOMMIT, and
TCANCEL and the target feature/arch extension "tme".

It also implements TME builtin functions, defined in ACLE Q2 2019
(https://developer.arm.com/docs/101028/latest)

Patch by Javed Absar and Momchil Velikov

Differential Revision: https://reviews.llvm.org/D64416

llvm-svn: 366322
---
 clang/include/clang/Basic/BuiltinsAArch64.def |  6 ++
 clang/lib/Basic/Targets/AArch64.cpp           |  6 ++
 clang/lib/Basic/Targets/AArch64.h             |  1 +
 clang/lib/Headers/arm_acle.h                  | 24 +++++++-
 clang/lib/Sema/SemaChecking.cpp               |  1 +
 .../test/CodeGen/aarch64-tme-tcancel-arg.cpp  | 10 ++++
 clang/test/CodeGen/aarch64-tme.c              | 36 ++++++++++++
 clang/test/Sema/aarch64-tme-errors.c          |  8 +++
 .../Sema/aarch64-tme-tcancel-const-error.c    |  4 ++
 .../Sema/aarch64-tme-tcancel-range-error.c    |  4 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     | 17 ++++++
 .../llvm/Support/AArch64TargetParser.def      |  1 +
 .../llvm/Support/AArch64TargetParser.h        |  1 +
 llvm/lib/Target/AArch64/AArch64.td            |  3 +
 .../lib/Target/AArch64/AArch64InstrFormats.td | 55 +++++++++++++++++--
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 29 ++++++++--
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |  2 +
 llvm/test/CodeGen/AArch64/tme-tcancel.ll      | 16 ++++++
 llvm/test/CodeGen/AArch64/tme-tcommit.ll      | 16 ++++++
 llvm/test/CodeGen/AArch64/tme-tstart.ll       | 16 ++++++
 llvm/test/CodeGen/AArch64/tme-ttest.ll        | 16 ++++++
 llvm/test/MC/AArch64/tme-error.s              | 47 ++++++++++++++++
 llvm/test/MC/AArch64/tme.s                    | 24 ++++++++
 llvm/test/MC/Disassembler/AArch64/tme.txt     | 19 +++++++
 llvm/unittests/Support/TargetParserTest.cpp   |  1 +
 25 files changed, 350 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp
 create mode 100644 clang/test/CodeGen/aarch64-tme.c
 create mode 100644 clang/test/Sema/aarch64-tme-errors.c
 create mode 100644 clang/test/Sema/aarch64-tme-tcancel-const-error.c
 create mode 100644 clang/test/Sema/aarch64-tme-tcancel-range-error.c
 create mode 100644 llvm/test/CodeGen/AArch64/tme-tcancel.ll
 create mode 100644 llvm/test/CodeGen/AArch64/tme-tcommit.ll
 create mode 100644 llvm/test/CodeGen/AArch64/tme-tstart.ll
 create mode 100644 llvm/test/CodeGen/AArch64/tme-ttest.ll
 create mode 100644 llvm/test/MC/AArch64/tme-error.s
 create mode 100644 llvm/test/MC/AArch64/tme.s
 create mode 100644 llvm/test/MC/Disassembler/AArch64/tme.txt

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index 7701ad98f4832..a144979acca6d 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -91,6 +91,12 @@ LANGBUILTIN(__sevl,  "v", "",   ALL_MS_LANGUAGES)
 // Misc
 BUILTIN(__builtin_sponentry, "v*", "c")
 
+// Transactional Memory Extension
+BUILTIN(__builtin_arm_tstart, "WUi", "nj")
+BUILTIN(__builtin_arm_tcommit, "v", "n")
+BUILTIN(__builtin_arm_tcancel, "vWUIi", "nr")
+BUILTIN(__builtin_arm_ttest, "WUi", "nc")
+
 TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 74ac69ab8946a..2abca0a660ae4 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -219,6 +219,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasMTE)
     Builder.defineMacro("__ARM_FEATURE_MEMORY_TAGGING", "1");
 
+  if (HasTME)
+    Builder.defineMacro("__ARM_FEATURE_TME", "1");
+
   if ((FPU & NeonMode) && HasFP16FML)
     Builder.defineMacro("__ARM_FEATURE_FP16FML", "1");
 
@@ -270,6 +273,7 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
   HasDotProd = false;
   HasFP16FML = false;
   HasMTE = false;
+  HasTME = false;
   ArchKind = llvm::AArch64::ArchKind::ARMV8A;
 
   for (const auto &Feature : Features) {
@@ -301,6 +305,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasFP16FML = true;
     if (Feature == "+mte")
       HasMTE = true;
+    if (Feature == "+tme")
+      HasTME = true;
   }
 
   setDataLayout();
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 5833c146003b0..b6aa07780edda 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -35,6 +35,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool HasDotProd;
   bool HasFP16FML;
   bool HasMTE;
+  bool HasTME;
 
   llvm::AArch64::ArchKind ArchKind;
 
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 096cc261af2c6..0510e6fd809f2 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -613,7 +613,7 @@ __jcvt(double __a) {
 #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
 #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
 
-// Memory Tagging Extensions (MTE) Intrinsics
+/* Memory Tagging Extensions (MTE) Intrinsics */
 #if __ARM_FEATURE_MEMORY_TAGGING
 #define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
 #define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
@@ -623,6 +623,28 @@ __jcvt(double __a) {
 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
 #endif
 
+/* Transactional Memory Extension (TME) Intrinsics */
+#if __ARM_FEATURE_TME
+
+#define _TMFAILURE_REASON  0x00007fffu
+#define _TMFAILURE_RTRY    0x00008000u
+#define _TMFAILURE_CNCL    0x00010000u
+#define _TMFAILURE_MEM     0x00020000u
+#define _TMFAILURE_IMP     0x00040000u
+#define _TMFAILURE_ERR     0x00080000u
+#define _TMFAILURE_SIZE    0x00100000u
+#define _TMFAILURE_NEST    0x00200000u
+#define _TMFAILURE_DBG     0x00400000u
+#define _TMFAILURE_INT     0x00800000u
+#define _TMFAILURE_TRIVIAL 0x01000000u
+
+#define __tstart()        __builtin_arm_tstart()
+#define __tcommit()       __builtin_arm_tcommit()
+#define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
+#define __ttest()         __builtin_arm_ttest()
+
+#endif /* __ARM_FEATURE_TME */
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f9f82cdeef432..442cbcf1429b6 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1928,6 +1928,7 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
   case AArch64::BI__builtin_arm_dmb:
   case AArch64::BI__builtin_arm_dsb:
   case AArch64::BI__builtin_arm_isb: l = 0; u = 15; break;
+  case AArch64::BI__builtin_arm_tcancel: l = 0; u = 65535; break;
   }
 
   return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
diff --git a/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp b/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp
new file mode 100644
index 0000000000000..ae6694703c838
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s
+
+#define A -1
+constexpr int f() { return 65536; }
+
+void t_cancel() {
+	__builtin_arm_tcancel(f() + A);
+}
+
+// CHECK: call void @llvm.aarch64.tcancel(i64 65535)
diff --git a/clang/test/CodeGen/aarch64-tme.c b/clang/test/CodeGen/aarch64-tme.c
new file mode 100644
index 0000000000000..8f90fb8eb8297
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-tme.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -DUSE_ACLE  -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s
+
+#ifdef USE_ACLE
+#include "arm_acle.h"
+void test_tme_funcs() {
+  __tstart();
+  (void)__ttest();
+  __tcommit();
+  __tcancel(0x789a);
+}
+#else
+void test_tme_funcs() {
+  __builtin_arm_tstart();
+  (void)__builtin_arm_ttest();
+  __builtin_arm_tcommit();
+  __builtin_arm_tcancel(0x789a);
+}
+#endif
+// CHECK: call i64 @llvm.aarch64.tstart()
+// CHECK: call i64 @llvm.aarch64.ttest()
+// CHECK: call void @llvm.aarch64.tcommit()
+// CHECK: call void @llvm.aarch64.tcancel(i64 30874)
+
+// CHECK: declare i64 @llvm.aarch64.tstart() #1
+// CHECK: declare i64 @llvm.aarch64.ttest() #1
+// CHECK: declare void @llvm.aarch64.tcommit() #1
+// CHECK: declare void @llvm.aarch64.tcancel(i64 immarg) #2
+
+#ifdef __ARM_FEATURE_TME
+void arm_feature_tme_defined() {}
+#endif
+// CHECK: define void @arm_feature_tme_defined()
+
+// CHECK: attributes #1 = { nounwind }
+// CHECK: attributes #2 = { noreturn nounwind }
diff --git a/clang/test/Sema/aarch64-tme-errors.c b/clang/test/Sema/aarch64-tme-errors.c
new file mode 100644
index 0000000000000..0e9c2a6beec0c
--- /dev/null
+++ b/clang/test/Sema/aarch64-tme-errors.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple aarch64-eabi -verify %s
+
+#include "arm_acle.h"
+
+void test_no_tme_funcs() {
+  __tstart();         // expected-warning{{implicit declaration of function '__tstart'}}
+  __builtin_tstart(); // expected-error{{use of unknown builtin '__builtin_tstart'}}
+}
diff --git a/clang/test/Sema/aarch64-tme-tcancel-const-error.c b/clang/test/Sema/aarch64-tme-tcancel-const-error.c
new file mode 100644
index 0000000000000..f97ece59b660b
--- /dev/null
+++ b/clang/test/Sema/aarch64-tme-tcancel-const-error.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -verify %s
+void t_cancel(unsigned short u) {
+  __builtin_arm_tcancel(u); // expected-error{{argument to '__builtin_arm_tcancel' must be a constant integer}}
+}
diff --git a/clang/test/Sema/aarch64-tme-tcancel-range-error.c b/clang/test/Sema/aarch64-tme-tcancel-range-error.c
new file mode 100644
index 0000000000000..c61ec90dfa501
--- /dev/null
+++ b/clang/test/Sema/aarch64-tme-tcancel-range-error.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -verify %s
+void t_cancel() {
+  __builtin_arm_tcancel(0x12345u); // expected-error{{argument value 74565 is outside the valid range [0, 65535]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7616d6a90c1bc..ceec212b66303 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -703,3 +703,20 @@ def int_aarch64_stg   : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
 def int_aarch64_subp :  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
     [IntrNoMem]>;
 }
+
+// Transactional Memory Extension (TME) Intrinsics
+let TargetPrefix = "aarch64" in {
+def int_aarch64_tstart  : GCCBuiltin<"__builtin_arm_tstart">,
+                         Intrinsic<[llvm_i64_ty]>;
+
+def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>;
+
+def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">,
+                          Intrinsic<[], [llvm_i64_ty],
+                                    [ImmArg<0>, IntrNoMem, IntrHasSideEffects,
+                                     IntrNoReturn]>;
+
+def int_aarch64_ttest   : GCCBuiltin<"__builtin_arm_ttest">,
+                          Intrinsic<[llvm_i64_ty], [],
+                                    [IntrNoMem, IntrHasSideEffects]>;
+}
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index e152f383b3ec0..fd21e3615b71e 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -79,6 +79,7 @@ AARCH64_ARCH_EXT_NAME("memtag",    AArch64::AEK_MTE,      "+mte",   "-mte")
 AARCH64_ARCH_EXT_NAME("ssbs",      AArch64::AEK_SSBS,     "+ssbs",  "-ssbs")
 AARCH64_ARCH_EXT_NAME("sb",        AArch64::AEK_SB,       "+sb",    "-sb")
 AARCH64_ARCH_EXT_NAME("predres",   AArch64::AEK_PREDRES,  "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("tme",       AArch64::AEK_TME,      "+tme",   "-tme")
 #undef AARCH64_ARCH_EXT_NAME
 
 #ifndef AARCH64_CPU_NAME
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
index 965d38535e747..564f831b07069 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -54,6 +54,7 @@ enum ArchExtKind : unsigned {
   AEK_SVE2SM4 =     1 << 25,
   AEK_SVE2SHA3 =    1 << 26,
   AEK_BITPERM =     1 << 27,
+  AEK_TME =         1 << 28,
 };
 
 enum class ArchKind {
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index e39c6995e3673..fcd5818727f12 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -345,6 +345,9 @@ def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
 def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
     "true", "Enable Memory Tagging Extension" >;
 
+def FeatureTME : SubtargetFeature<"tme", "HasTME",
+    "true", "Enable Transactional Memory Extension" >;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 74fa5ef713d9e..2af5726fc4f5e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -714,12 +714,15 @@ def logical_imm64_not : Operand<i64> {
   let ParserMatchClass = LogicalImm64NotOperand;
 }
 
-// imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
+let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
+def i32_imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint32_t)Imm) < 65536;
-}]> {
-  let ParserMatchClass = AsmImmRange<0, 65535>;
-  let PrintMethod = "printImmHex";
+}]>;
+
+def i64_imm0_65535 : Operand<i64>, ImmLeaf<i64, [{
+  return ((uint64_t)Imm) < 65536;
+}]>;
 }
 
 // imm0_255 predicate - True if the immediate is in the range [0,255].
@@ -1082,6 +1085,46 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
   let Inst{4-0} = Rt;
 }
 
+// System instructions for transactional memory extension
+class TMBaseSystemI<bit L, bits<4> CRm, bits<3> op2, dag oops, dag iops,
+                    string asm, string operands, list<dag> pattern>
+    : BaseSystemI<L, oops, iops, asm, operands, pattern>,
+      Sched<[WriteSys]> {
+  let Inst{20-12} = 0b000110011;
+  let Inst{11-8} = CRm;
+  let Inst{7-5} = op2;
+  let DecoderMethod = "";
+
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+// System instructions for transactional memory - single input operand
+class TMSystemI<bits<4> CRm, string asm, list<dag> pattern>
+    : TMBaseSystemI<0b1, CRm, 0b011,
+                    (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> {
+  bits<5> Rt;
+  let Inst{4-0} = Rt;
+}
+
+// System instructions for transactional memory - no operand
+class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
+    : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> {
+  let Inst{4-0} = 0b11111;
+}
+
+// System instructions for exit from transactions
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
+    : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>,
+      Sched<[WriteSys]> {
+  bits<16> imm;
+  let Inst{31-24} = 0b11010100;
+  let Inst{23-21} = op1;
+  let Inst{20-5}  = imm;
+  let Inst{4-0}   = 0b00000;
+}
+
 // Hint instructions that take both a CRm and a 3-bit immediate.
 // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
 // model patterns with sufficiently fine granularity
@@ -4086,7 +4129,7 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
 class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
-    : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
+    : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>,
       Sched<[WriteSys]> {
   bits<16> imm;
   let Inst{31-24} = 0b11010100;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 897b3ebb3847f..74f07f569a518 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -133,6 +133,8 @@ def HasBTI           : Predicate<"Subtarget->hasBTI()">,
                        AssemblerPredicate<"FeatureBranchTargetId", "bti">;
 def HasMTE           : Predicate<"Subtarget->hasMTE()">,
                        AssemblerPredicate<"FeatureMTE", "mte">;
+def HasTME           : Predicate<"Subtarget->hasTME()">,
+                       AssemblerPredicate<"FeatureTME", "tme">;
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
@@ -798,6 +800,21 @@ def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
                 (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
                  sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
 
+
+let Predicates = [HasTME] in {
+
+def TSTART : TMSystemI<0b0000, "tstart", [(set GPR64:$Rt, (int_aarch64_tstart))]>;
+
+def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
+
+let mayLoad = 0, mayStore = 0 in {
+def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]>;
+def TCANCEL : TMSystemException<0b011, "tcancel", [(int_aarch64_tcancel i64_imm0_65535:$imm)]> {
+  let isBarrier = 1;
+}
+}
+} // HasTME
+
 //===----------------------------------------------------------------------===//
 // Move immediate instructions.
 //===----------------------------------------------------------------------===//
@@ -809,12 +826,12 @@ let PostEncoderMethod = "fixMOVZ" in
 defm MOVZ : MoveImmediate<0b10, "movz">;
 
 // First group of aliases covers an implicit "lsl #0".
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
 
 // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
 def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 0c84cfb8329a6..ce829795309c2 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -134,6 +134,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   bool HasBTI = false;
   bool HasRandGen = false;
   bool HasMTE = false;
+  bool HasTME = false;
 
   // Arm SVE2 extensions
   bool HasSVE2AES = false;
@@ -380,6 +381,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   bool hasBTI() const { return HasBTI; }
   bool hasRandGen() const { return HasRandGen; }
   bool hasMTE() const { return HasMTE; }
+  bool hasTME() const { return HasTME; }
   // Arm SVE2 extensions
   bool hasSVE2AES() const { return HasSVE2AES; }
   bool hasSVE2SM4() const { return HasSVE2SM4; }
diff --git a/llvm/test/CodeGen/AArch64/tme-tcancel.ll b/llvm/test/CodeGen/AArch64/tme-tcancel.ll
new file mode 100644
index 0000000000000..f4fb7b665de16
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tme-tcancel.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-unknown-eabi"
+
+define void @test_tcancel() #0 {
+  tail call void @llvm.aarch64.tcancel(i64 0) #1
+  unreachable
+}
+
+declare void @llvm.aarch64.tcancel(i64 immarg) #1
+
+attributes #0 = { "target-features"="+tme" }
+attributes #1 = { nounwind noreturn }
+
+; CHECK-LABEL: test_tcancel
+; CHECK: tcancel
diff --git a/llvm/test/CodeGen/AArch64/tme-tcommit.ll b/llvm/test/CodeGen/AArch64/tme-tcommit.ll
new file mode 100644
index 0000000000000..cd85a3e5bd8ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tme-tcommit.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-unknown-eabi"
+
+define void @test_tcommit() #0 {
+  tail call void @llvm.aarch64.tcommit()
+  ret void
+}
+
+declare void @llvm.aarch64.tcommit() #1
+
+attributes #0 = { "target-features"="+tme" }
+attributes #1 = { nounwind }
+
+; CHECK-LABEL: test_tcommit
+; CHECK: tcommit
diff --git a/llvm/test/CodeGen/AArch64/tme-tstart.ll b/llvm/test/CodeGen/AArch64/tme-tstart.ll
new file mode 100644
index 0000000000000..c761842e7980d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tme-tstart.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-unknown-eabi"
+
+define i64 @test_tstart() #0 {
+  %r = tail call i64 @llvm.aarch64.tstart()
+  ret i64 %r
+}
+
+declare i64 @llvm.aarch64.tstart() #1
+
+attributes #0 = { "target-features"="+tme" }
+attributes #1 = { nounwind }
+
+; CHECK-LABEL: test_tstart
+; CHECK: tstart x
diff --git a/llvm/test/CodeGen/AArch64/tme-ttest.ll b/llvm/test/CodeGen/AArch64/tme-ttest.ll
new file mode 100644
index 0000000000000..597821081354f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tme-ttest.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "aarch64-unknown-unknown-eabi"
+
+define i64 @test_ttest() #0 {
+  %r = tail call i64 @llvm.aarch64.ttest()
+  ret i64 %r
+}
+
+declare i64 @llvm.aarch64.ttest() #1
+
+attributes #0 = { "target-features"="+tme" }
+attributes #1 = { nounwind }
+
+; CHECK-LABEL: test_ttest
+; CHECK: ttest x
diff --git a/llvm/test/MC/AArch64/tme-error.s b/llvm/test/MC/AArch64/tme-error.s
new file mode 100644
index 0000000000000..f91f58fa3ef25
--- /dev/null
+++ b/llvm/test/MC/AArch64/tme-error.s
@@ -0,0 +1,47 @@
+// Tests for transactional memory extension instructions
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+tme < %s 2>&1   | FileCheck %s
+
+tstart
+// CHECK: error: too few operands for instruction
+// CHECK-NEXT: tstart
+tstart  x4, x5
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: tstart x4, x5
+tstart  x4, #1
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: tstart x4, #1
+tstart  sp
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: tstart sp
+
+ttest
+// CHECK: error: too few operands for instruction
+// CHECK-NEXT: ttest
+ttest  x4, x5
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: ttest x4, x5
+ttest  x4, #1
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: ttest x4, #1
+ttest  sp
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: ttest sp
+
+tcommit  x4
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: tcommit x4
+tcommit  sp
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: tcommit sp
+
+
+tcancel
+// CHECK: error: too few operands for instruction
+// CHECK-NEXT tcancel
+tcancel x0
+// CHECK: error: immediate must be an integer in range [0, 65535]
+// CHECK-NEXT tcancel
+tcancel #65536
+// CHECK: error: immediate must be an integer in range [0, 65535]
+// CHECK-NEXT: tcancel #65536
+
diff --git a/llvm/test/MC/AArch64/tme.s b/llvm/test/MC/AArch64/tme.s
new file mode 100644
index 0000000000000..cd47274127649
--- /dev/null
+++ b/llvm/test/MC/AArch64/tme.s
@@ -0,0 +1,24 @@
+// Tests for transaction memory extension instructions
+//
+// RUN:     llvm-mc -triple aarch64 -show-encoding -mattr=+tme   < %s      | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-tme   < %s 2>&1 | FileCheck %s --check-prefix=NOTME
+
+tstart x3
+ttest  x4
+tcommit
+tcancel #0x1234
+
+// CHECK: tstart x3         // encoding: [0x63,0x30,0x23,0xd5]
+// CHECK: ttest x4          // encoding: [0x64,0x31,0x23,0xd5]
+// CHECK: tcommit           // encoding: [0x7f,0x30,0x03,0xd5]
+// CHECK: tcancel #0x1234   // encoding: [0x80,0x46,0x62,0xd4]
+
+
+// NOTME: instruction requires: tme
+// NOTME-NEXT: tstart x3
+// NOTME: instruction requires: tme
+// NOTME-NEXT: ttest  x4
+// NOTME: instruction requires: tme
+// NOTME-NEXT: tcommit
+// NOTME: instruction requires: tme
+// NOTME-NEXT: tcancel #0x1234
diff --git a/llvm/test/MC/Disassembler/AArch64/tme.txt b/llvm/test/MC/Disassembler/AArch64/tme.txt
new file mode 100644
index 0000000000000..f250b33e0e1df
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AArch64/tme.txt
@@ -0,0 +1,19 @@
+# Tests for transaction memory extension instructions
+# RUN:     llvm-mc -triple=aarch64 -mattr=+tme   -disassemble < %s      | FileCheck %s
+# RUN: not llvm-mc -triple=aarch64 -mattr=-tme   -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOTME
+
+[0x63,0x30,0x23,0xd5]
+[0x64,0x31,0x23,0xd5]
+[0x7f,0x30,0x03,0xd5]
+[0x80,0x46,0x62,0xd4]
+
+# CHECK: tstart x3
+# CHECK: ttest  x4
+# CHECK: tcommit
+# CHECK: tcancel #0x1234
+
+# NOTEME: mrs
+# NOTEME-NEXT: mrs
+# NOTEME-NEXT: msr
+# NOTME:      warning: invalid instruction encoding
+# NOTME-NEXT: [0x80,0x46,0x62,0xd4]
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 34c7a8a4fd1c8..5ef8f2e4b5500 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -1119,6 +1119,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
                               {"rcpc", "norcpc", "+rcpc", "-rcpc" },
                               {"rng", "norng", "+rand", "-rand"},
                               {"memtag", "nomemtag", "+mte", "-mte"},
+                              {"tme", "notme", "+tme", "-tme"},
                               {"ssbs", "nossbs", "+ssbs", "-ssbs"},
                               {"sb", "nosb", "+sb", "-sb"},
                               {"predres", "nopredres", "+predres", "-predres"}

From 70235c642e66bdf4900aabd541fa9a1548f72d0e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@gmail.com>
Date: Wed, 17 Jul 2019 13:40:03 +0000
Subject: [PATCH 341/451] [AMDGPU] Optimize atomic AND/OR/XOR

Summary: Extend the atomic optimizer to handle AND, OR and XOR.

Reviewers: arsenm, sheredom

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64809

llvm-svn: 366323
---
 .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp   | 71 ++++++++++++++-----
 .../atomic_optimizations_local_pointer.ll     | 36 ++++++++++
 2 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index c65a49b7c5bc7..8a92e7d923fbc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -127,6 +127,9 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
     return;
   case AtomicRMWInst::Add:
   case AtomicRMWInst::Sub:
+  case AtomicRMWInst::And:
+  case AtomicRMWInst::Or:
+  case AtomicRMWInst::Xor:
   case AtomicRMWInst::Max:
   case AtomicRMWInst::Min:
   case AtomicRMWInst::UMax:
@@ -177,6 +180,21 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
   case Intrinsic::amdgcn_raw_buffer_atomic_sub:
     Op = AtomicRMWInst::Sub;
     break;
+  case Intrinsic::amdgcn_buffer_atomic_and:
+  case Intrinsic::amdgcn_struct_buffer_atomic_and:
+  case Intrinsic::amdgcn_raw_buffer_atomic_and:
+    Op = AtomicRMWInst::And;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_or:
+  case Intrinsic::amdgcn_struct_buffer_atomic_or:
+  case Intrinsic::amdgcn_raw_buffer_atomic_or:
+    Op = AtomicRMWInst::Or;
+    break;
+  case Intrinsic::amdgcn_buffer_atomic_xor:
+  case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+  case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+    Op = AtomicRMWInst::Xor;
+    break;
   case Intrinsic::amdgcn_buffer_atomic_smin:
   case Intrinsic::amdgcn_struct_buffer_atomic_smin:
   case Intrinsic::amdgcn_raw_buffer_atomic_smin:
@@ -240,6 +258,12 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
     return B.CreateBinOp(Instruction::Add, LHS, RHS);
   case AtomicRMWInst::Sub:
     return B.CreateBinOp(Instruction::Sub, LHS, RHS);
+  case AtomicRMWInst::And:
+    return B.CreateBinOp(Instruction::And, LHS, RHS);
+  case AtomicRMWInst::Or:
+    return B.CreateBinOp(Instruction::Or, LHS, RHS);
+  case AtomicRMWInst::Xor:
+    return B.CreateBinOp(Instruction::Xor, LHS, RHS);
 
   case AtomicRMWInst::Max:
     Pred = CmpInst::ICMP_SGT;
@@ -265,8 +289,11 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
     llvm_unreachable("Unhandled atomic op");
   case AtomicRMWInst::Add:
   case AtomicRMWInst::Sub:
+  case AtomicRMWInst::Or:
+  case AtomicRMWInst::Xor:
   case AtomicRMWInst::UMax:
     return APInt::getMinValue(BitWidth);
+  case AtomicRMWInst::And:
   case AtomicRMWInst::UMin:
     return APInt::getMaxValue(BitWidth);
   case AtomicRMWInst::Max:
@@ -331,10 +358,10 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
   Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1));
   CallInst *const PartialMbcnt = B.CreateIntrinsic(
       Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)});
-  CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
-                                            {ExtractHi, PartialMbcnt});
-
-  Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false);
+  Value *const Mbcnt =
+      B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {},
+                                        {ExtractHi, PartialMbcnt}),
+                      Ty, false);
 
   Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth));
 
@@ -408,32 +435,39 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
 
     case AtomicRMWInst::Add:
     case AtomicRMWInst::Sub: {
-      // Get the total number of active lanes we have by using popcount.
-      Instruction *const Ctpop =
-          B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot);
-      Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false);
-
-      // Calculate the new value we will be contributing to the atomic operation
-      // for the entire wavefront.
-      NewV = B.CreateMul(V, CtpopCast);
+      // The new value we will be contributing to the atomic operation is the
+      // old value times the number of active lanes.
+      Value *const Ctpop = B.CreateIntCast(
+          B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
+      NewV = B.CreateMul(V, Ctpop);
       break;
     }
 
+    case AtomicRMWInst::And:
+    case AtomicRMWInst::Or:
     case AtomicRMWInst::Max:
     case AtomicRMWInst::Min:
     case AtomicRMWInst::UMax:
     case AtomicRMWInst::UMin:
-      // Max/min with a uniform value is idempotent: doing the atomic operation
-      // multiple times has the same effect as doing it once.
+      // These operations with a uniform value are idempotent: doing the atomic
+      // operation multiple times has the same effect as doing it once.
       NewV = V;
       break;
+
+    case AtomicRMWInst::Xor:
+      // The new value we will be contributing to the atomic operation is the
+      // old value times the parity of the number of active lanes.
+      Value *const Ctpop = B.CreateIntCast(
+          B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false);
+      NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1));
+      break;
     }
   }
 
   // We only want a single lane to enter our new control flow, and we do this
   // by checking if there are any active lanes below us. Only one lane will
   // have 0 active lanes below us, so that will be the only one to progress.
-  Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0));
+  Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0));
 
   // Store I's original basic block before we split the block.
   BasicBlock *const EntryBB = I.getParent();
@@ -502,14 +536,19 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
       llvm_unreachable("Unhandled atomic op");
     case AtomicRMWInst::Add:
     case AtomicRMWInst::Sub:
-      LaneOffset = B.CreateMul(V, MbcntCast);
+      LaneOffset = B.CreateMul(V, Mbcnt);
       break;
+    case AtomicRMWInst::And:
+    case AtomicRMWInst::Or:
     case AtomicRMWInst::Max:
     case AtomicRMWInst::Min:
     case AtomicRMWInst::UMax:
     case AtomicRMWInst::UMin:
       LaneOffset = B.CreateSelect(Cond, Identity, V);
       break;
+    case AtomicRMWInst::Xor:
+      LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1));
+      break;
     }
   }
   Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset);
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 5f7649c1c0ea5..05620789141cf 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -195,6 +195,42 @@ entry:
   ret void
 }
 
+; GCN-LABEL: and_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw and i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: or_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw or i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: xor_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw xor i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: max_i32_varying:
 ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]

From 62069ac310654f464e65d98e92d33d16faa738ee Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Wed, 17 Jul 2019 13:40:42 +0000
Subject: [PATCH 342/451] [llvm-ar][test] \r\n -> \n

Also simplify some empty output tests with 'count 0'

llvm-svn: 366324
---
 llvm/test/tools/llvm-ar/create.test           |  27 +-
 .../tools/llvm-ar/dash-before-letter.test     |  24 +-
 llvm/test/tools/llvm-ar/delete.test           | 134 +++----
 llvm/test/tools/llvm-ar/insert-after.test     | 118 +++---
 llvm/test/tools/llvm-ar/insert-before.test    | 118 +++---
 llvm/test/tools/llvm-ar/move-after.test       | 118 +++---
 llvm/test/tools/llvm-ar/move-before.test      | 138 +++----
 llvm/test/tools/llvm-ar/move.test             | 258 ++++++------
 llvm/test/tools/llvm-ar/mri-addlib.test       |  90 ++---
 llvm/test/tools/llvm-ar/mri-addmod.test       |  54 +--
 llvm/test/tools/llvm-ar/mri-comments.test     |  38 +-
 llvm/test/tools/llvm-ar/mri-end.test          |  96 ++---
 llvm/test/tools/llvm-ar/quick-append.test     | 184 +++++----
 .../test/tools/llvm-ar/read-only-archive.test |  60 +--
 llvm/test/tools/llvm-ar/replace-update.test   | 378 +++++++++---------
 llvm/test/tools/llvm-ar/replace.test          | 356 ++++++++---------
 llvm/test/tools/llvm-ar/symtab.test           | 140 +++----
 17 files changed, 1164 insertions(+), 1167 deletions(-)

diff --git a/llvm/test/tools/llvm-ar/create.test b/llvm/test/tools/llvm-ar/create.test
index 7e4951da5fa2b..a29b93ec5d6da 100644
--- a/llvm/test/tools/llvm-ar/create.test
+++ b/llvm/test/tools/llvm-ar/create.test
@@ -1,14 +1,13 @@
-## Test the creation warning and supression of that warning.
-
-RUN: touch %t1.txt
-RUN: touch %t2.txt
-
-RUN: rm -f %t.warning.ar
-RUN: llvm-ar r %t.warning.ar %t1.txt %t2.txt 2>&1 \
-RUN:   | FileCheck %s -DOUTPUT=%t.warning.ar
-
-CHECK: warning: creating [[OUTPUT]]
-
-RUN: rm -f %t.supressed.ar
-RUN: llvm-ar cr %t.supressed.ar %t1.txt %t2.txt 2>&1 \
-RUN:   | FileCheck --allow-empty /dev/null --implicit-check-not={{.}}
+## Test the creation warning and supression of that warning.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+
+RUN: rm -f %t.warning.ar
+RUN: llvm-ar r %t.warning.ar %t1.txt %t2.txt 2>&1 \
+RUN:   | FileCheck %s -DOUTPUT=%t.warning.ar
+
+CHECK: warning: creating [[OUTPUT]]
+
+RUN: rm -f %t.supressed.ar
+RUN: llvm-ar cr %t.supressed.ar %t1.txt %t2.txt 2>&1 | count 0
diff --git a/llvm/test/tools/llvm-ar/dash-before-letter.test b/llvm/test/tools/llvm-ar/dash-before-letter.test
index 3002a6de7ef90..43930b1217367 100644
--- a/llvm/test/tools/llvm-ar/dash-before-letter.test
+++ b/llvm/test/tools/llvm-ar/dash-before-letter.test
@@ -1,12 +1,12 @@
-# Test the use of dash before key letters.
-
-RUN: touch %t1.txt
-RUN: touch %t2.txt
-
-RUN: rm -f %t.ar
-RUN: llvm-ar s -cr %t.ar %t1.txt
-RUN: llvm-ar -r -s %t.ar %t2.txt -s
-RUN: llvm-ar -t %t.ar | FileCheck %s
-
-CHECK:      1.txt
-CHECK-NEXT: 2.txt
+# Test the use of dash before key letters.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+
+RUN: rm -f %t.ar
+RUN: llvm-ar s -cr %t.ar %t1.txt
+RUN: llvm-ar -r -s %t.ar %t2.txt -s
+RUN: llvm-ar -t %t.ar | FileCheck %s
+
+CHECK:      1.txt
+CHECK-NEXT: 2.txt
diff --git a/llvm/test/tools/llvm-ar/delete.test b/llvm/test/tools/llvm-ar/delete.test
index d5ab797664173..20e58b9a45b57 100644
--- a/llvm/test/tools/llvm-ar/delete.test
+++ b/llvm/test/tools/llvm-ar/delete.test
@@ -1,67 +1,67 @@
-## Test the deletion of members and that symbols are removed from the symbol table.
-
-# RUN: yaml2obj %s -o %t-delete.o --docnum=1
-# RUN: yaml2obj %s -o %t-keep.o --docnum=2
-# RUN: touch %t1.txt
-# RUN: touch %t2.txt
-
-## Add file:
-# RUN: rm -f %t.a
-# RUN: llvm-ar rc %t.a %t1.txt %t-delete.o %t-keep.o %t2.txt
-# RUN: llvm-nm --print-armap %t.a \
-# RUN:   | FileCheck %s --check-prefix=SYMBOL-ADDED
-# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=FILE-ADDED
-
-# SYMBOL-ADDED:      symbol1
-# SYMBOL-ADDED-NEXT: symbol2
-
-# FILE-ADDED:      1.txt
-# FILE-ADDED-NEXT: delete.o
-# FILE-ADDED-NEXT: keep.o
-# FILE-ADDED-NEXT: 2.txt
-
-## Delete file that is not a member:
-# RUN: cp %t.a %t-archive-copy.a
-# RUN: llvm-ar d %t.a t/missing.o
-# RUN: cmp %t.a %t-archive-copy.a
-
-## Delete file:
-# RUN: llvm-ar d %t.a %t-delete.o
-# RUN: llvm-nm --print-armap %t.a \
-# RUN:   | FileCheck %s --check-prefix=SYMBOL-DELETED --implicit-check-not symbol1
-# RUN: llvm-ar t %t.a \
-# RUN:   | FileCheck %s --check-prefix=FILE-DELETED --implicit-check-not delete.o
-
-# SYMBOL-DELETED: symbol2
-
-# FILE-DELETED:      1.txt
-# FILE-DELETED-NEXT: keep.o
-# FILE-DELETED-NEXT: 2.txt
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol2
-    Binding: STB_GLOBAL
-    Section: .text
+## Test the deletion of members and that symbols are removed from the symbol table.
+
+# RUN: yaml2obj %s -o %t-delete.o --docnum=1
+# RUN: yaml2obj %s -o %t-keep.o --docnum=2
+# RUN: touch %t1.txt
+# RUN: touch %t2.txt
+
+## Add file:
+# RUN: rm -f %t.a
+# RUN: llvm-ar rc %t.a %t1.txt %t-delete.o %t-keep.o %t2.txt
+# RUN: llvm-nm --print-armap %t.a \
+# RUN:   | FileCheck %s --check-prefix=SYMBOL-ADDED
+# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=FILE-ADDED
+
+# SYMBOL-ADDED:      symbol1
+# SYMBOL-ADDED-NEXT: symbol2
+
+# FILE-ADDED:      1.txt
+# FILE-ADDED-NEXT: delete.o
+# FILE-ADDED-NEXT: keep.o
+# FILE-ADDED-NEXT: 2.txt
+
+## Delete file that is not a member:
+# RUN: cp %t.a %t-archive-copy.a
+# RUN: llvm-ar d %t.a t/missing.o
+# RUN: cmp %t.a %t-archive-copy.a
+
+## Delete file:
+# RUN: llvm-ar d %t.a %t-delete.o
+# RUN: llvm-nm --print-armap %t.a \
+# RUN:   | FileCheck %s --check-prefix=SYMBOL-DELETED --implicit-check-not symbol1
+# RUN: llvm-ar t %t.a \
+# RUN:   | FileCheck %s --check-prefix=FILE-DELETED --implicit-check-not delete.o
+
+# SYMBOL-DELETED: symbol2
+
+# FILE-DELETED:      1.txt
+# FILE-DELETED-NEXT: keep.o
+# FILE-DELETED-NEXT: 2.txt
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/insert-after.test b/llvm/test/tools/llvm-ar/insert-after.test
index cd8ee9409c6b4..7c01e736149aa 100644
--- a/llvm/test/tools/llvm-ar/insert-after.test
+++ b/llvm/test/tools/llvm-ar/insert-after.test
@@ -1,59 +1,59 @@
-## Test inserting files after a file.
-
-RUN: touch %t1.txt
-RUN: touch %t2.txt
-RUN: touch %t3.txt
-RUN: touch %t4.txt
-
-# Insert one file:
-RUN: rm -f %t-one.a
-RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt
-RUN: llvm-ar ra %t1.txt %t-one.a %t3.txt
-RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
-
-ONE:      1.txt
-ONE-NEXT: 3.txt
-ONE-NEXT: 2.txt
-
-# Insert file at back:
-RUN: rm -f %t-back.a
-RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt
-RUN: llvm-ar ra %t2.txt %t-back.a %t3.txt
-RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK
-
-BACK:      1.txt
-BACK-NEXT: 2.txt
-BACK-NEXT: 3.txt
-
-# Insert multiple files:
-RUN: rm -f %t-multiple.a
-RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt
-RUN: llvm-ar ra %t1.txt %t-multiple.a %t4.txt %t3.txt
-RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
-
-MULTIPLE:      1.txt
-MULTIPLE-NEXT: 4.txt
-MULTIPLE-NEXT: 3.txt
-MULTIPLE-NEXT: 2.txt
-
-# Insert after invalid file:
-RUN: rm -f %t-invalid.a
-RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
-RUN: not llvm-ar ra invalid.txt %t-invalid.a %t2.txt 2>&1 \
-RUN:   | FileCheck %s --check-prefix=ERROR
-RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
-
-ERROR: error: Insertion point not found.
-INVALID:      1.txt
-INVALID-NEXT: 2.txt
-INVALID-NEXT: 3.txt
-
-# Insert file at the same position:
-RUN: rm -f %t-position.a
-RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar ra %t1.txt %t-position.a %t2.txt
-RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
-
-POSITION:      1.txt
-POSITION-NEXT: 2.txt
-POSITION-NEXT: 3.txt
+## Test inserting files after a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Insert one file:
+RUN: rm -f %t-one.a
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt
+RUN: llvm-ar ra %t1.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Insert file at back:
+RUN: rm -f %t-back.a
+RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt
+RUN: llvm-ar ra %t2.txt %t-back.a %t3.txt
+RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK
+
+BACK:      1.txt
+BACK-NEXT: 2.txt
+BACK-NEXT: 3.txt
+
+# Insert multiple files:
+RUN: rm -f %t-multiple.a
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt
+RUN: llvm-ar ra %t1.txt %t-multiple.a %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 2.txt
+
+# Insert after invalid file:
+RUN: rm -f %t-invalid.a
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar ra invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Insert file at the same position:
+RUN: rm -f %t-position.a
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ra %t1.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/insert-before.test b/llvm/test/tools/llvm-ar/insert-before.test
index 61505d8162aad..4abf3a000287c 100644
--- a/llvm/test/tools/llvm-ar/insert-before.test
+++ b/llvm/test/tools/llvm-ar/insert-before.test
@@ -1,59 +1,59 @@
-## Test inserting files before a file.
-
-RUN: touch %t1.txt
-RUN: touch %t2.txt
-RUN: touch %t3.txt
-RUN: touch %t4.txt
-
-# Insert one file:
-RUN: rm -f %t-one.a
-RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt
-RUN: llvm-ar rb %t2.txt %t-one.a %t3.txt
-RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
-
-ONE:      1.txt
-ONE-NEXT: 3.txt
-ONE-NEXT: 2.txt
-
-# Insert file at front:
-RUN: rm -f %t-front.a
-RUN: llvm-ar rc %t-front.a %t1.txt %t2.txt
-RUN: llvm-ar rb %t1.txt %t-front.a %t3.txt
-RUN: llvm-ar t %t-front.a | FileCheck %s --check-prefix=FRONT
-
-FRONT:      3.txt
-FRONT-NEXT: 1.txt
-FRONT-NEXT: 2.txt
-
-# Insert multiple files:
-RUN: rm -f %t-multiple.a
-RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt
-RUN: llvm-ar rb %t2.txt %t-multiple.a %t4.txt %t3.txt
-RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
-
-MULTIPLE:      1.txt
-MULTIPLE-NEXT: 4.txt
-MULTIPLE-NEXT: 3.txt
-MULTIPLE-NEXT: 2.txt
-
-# Insert before an invalid file:
-RUN: rm -f %t-invalid.a
-RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
-RUN: not llvm-ar rb invalid.txt %t-invalid.a %t2.txt 2>&1 \
-RUN:   | FileCheck %s --check-prefix=ERROR
-RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
-
-ERROR: error: Insertion point not found.
-INVALID:      1.txt
-INVALID-NEXT: 2.txt
-INVALID-NEXT: 3.txt
-
-# Insert file at the same position:
-RUN: rm -f %t-position.a
-RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar rb %t3.txt %t-position.a %t2.txt
-RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
-
-POSITION:      1.txt
-POSITION-NEXT: 2.txt
-POSITION-NEXT: 3.txt
+## Test inserting files before a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Insert one file:
+RUN: rm -f %t-one.a
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt
+RUN: llvm-ar rb %t2.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Insert file at front:
+RUN: rm -f %t-front.a
+RUN: llvm-ar rc %t-front.a %t1.txt %t2.txt
+RUN: llvm-ar rb %t1.txt %t-front.a %t3.txt
+RUN: llvm-ar t %t-front.a | FileCheck %s --check-prefix=FRONT
+
+FRONT:      3.txt
+FRONT-NEXT: 1.txt
+FRONT-NEXT: 2.txt
+
+# Insert multiple files:
+RUN: rm -f %t-multiple.a
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt
+RUN: llvm-ar rb %t2.txt %t-multiple.a %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 2.txt
+
+# Insert before an invalid file:
+RUN: rm -f %t-invalid.a
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar rb invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Insert file at the same position:
+RUN: rm -f %t-position.a
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar rb %t3.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/move-after.test b/llvm/test/tools/llvm-ar/move-after.test
index fb61f2aa118f6..6a4360c3cd0a4 100644
--- a/llvm/test/tools/llvm-ar/move-after.test
+++ b/llvm/test/tools/llvm-ar/move-after.test
@@ -1,59 +1,59 @@
-## Test moving files after a file.
-
-RUN: touch %t1.txt
-RUN: touch %t2.txt
-RUN: touch %t3.txt
-RUN: touch %t4.txt
-
-# Move one file:
-RUN: rm -f %t-one.ar
-RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar ma %t1.txt %t-one.a %t3.txt
-RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
-
-ONE:      1.txt
-ONE-NEXT: 3.txt
-ONE-NEXT: 2.txt
-
-# Move file to back:
-RUN: rm -f %t-back.ar
-RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar ma %t2.txt %t-back.a %t1.txt
-RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK
-
-BACK:      2.txt
-BACK-NEXT: 1.txt
-BACK-NEXT: 3.txt
-
-# Move multiple files:
-RUN: rm -f %t-multiple.ar
-RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt
-RUN: llvm-ar ma %t1.txt %t-multiple.a  %t4.txt %t3.txt
-RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
-
-MULTIPLE:      1.txt
-MULTIPLE-NEXT: 3.txt
-MULTIPLE-NEXT: 4.txt
-MULTIPLE-NEXT: 2.txt
-
-# Move after invalid file:
-RUN: rm -f %t-invalid.ar
-RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
-RUN: not llvm-ar ma invalid.txt %t-invalid.a %t2.txt 2>&1 \
-RUN:   | FileCheck %s --check-prefix=ERROR
-RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
-
-ERROR: error: Insertion point not found.
-INVALID:      1.txt
-INVALID-NEXT: 2.txt
-INVALID-NEXT: 3.txt
-
-# Move file to the same position:
-RUN: rm -f %t-position.ar
-RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar ma %t1.txt %t-position.a %t2.txt
-RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
-
-POSITION:      1.txt
-POSITION-NEXT: 2.txt
-POSITION-NEXT: 3.txt
+## Test moving files after a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Move one file:
+RUN: rm -f %t-one.ar
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ma %t1.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Move file to back:
+RUN: rm -f %t-back.ar
+RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ma %t2.txt %t-back.a %t1.txt
+RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK
+
+BACK:      2.txt
+BACK-NEXT: 1.txt
+BACK-NEXT: 3.txt
+
+# Move multiple files:
+RUN: rm -f %t-multiple.ar
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt
+RUN: llvm-ar ma %t1.txt %t-multiple.a  %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 2.txt
+
+# Move after invalid file:
+RUN: rm -f %t-invalid.ar
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar ma invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Move file to the same position:
+RUN: rm -f %t-position.ar
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar ma %t1.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/move-before.test b/llvm/test/tools/llvm-ar/move-before.test
index b5fefec29557a..306a887e26b89 100644
--- a/llvm/test/tools/llvm-ar/move-before.test
+++ b/llvm/test/tools/llvm-ar/move-before.test
@@ -1,69 +1,69 @@
-## Test moving files after a file.
-
-RUN: touch %t1.txt
-RUN: touch %t2.txt
-RUN: touch %t3.txt
-RUN: touch %t4.txt
-
-# Move one file:
-RUN: rm -f %t-one.ar
-RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar mb %t2.txt %t-one.a %t3.txt
-RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
-
-ONE:      1.txt
-ONE-NEXT: 3.txt
-ONE-NEXT: 2.txt
-
-# Move file to front:
-RUN: rm -f %t-front.ar
-RUN: llvm-ar rc %t-front.ar %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar mb %t1.txt %t-front.ar %t3.txt
-RUN: llvm-ar t %t-front.ar | FileCheck %s --check-prefix=FRONT
-
-FRONT:      3.txt
-FRONT-NEXT: 1.txt
-FRONT-NEXT: 2.txt
-
-# Move multiple files:
-RUN: rm -f %t-multiple.ar
-RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt
-RUN: llvm-ar mb %t2.txt %t-multiple.a %t4.txt %t3.txt
-RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
-
-MULTIPLE:      1.txt
-MULTIPLE-NEXT: 3.txt
-MULTIPLE-NEXT: 4.txt
-MULTIPLE-NEXT: 2.txt
-
-# Move before invalid file:
-RUN: rm -f %t-invalid.ar
-RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
-RUN: not llvm-ar mb invalid.txt %t-invalid.a %t2.txt 2>&1 \
-RUN:   | FileCheck %s --check-prefix=ERROR
-RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
-
-ERROR: error: Insertion point not found.
-INVALID:      1.txt
-INVALID-NEXT: 2.txt
-INVALID-NEXT: 3.txt
-
-# Move file to the same position:
-RUN: rm -f %t-position.ar
-RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar mb %t3.txt %t-position.a %t2.txt
-RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
-
-POSITION:      1.txt
-POSITION-NEXT: 2.txt
-POSITION-NEXT: 3.txt
-
-# Move file after itself:
-RUN: rm -f %t-same.ar
-RUN: llvm-ar rc %t-same.ar %t1.txt %t2.txt %t3.txt
-RUN: llvm-ar mb %t2.txt %t-same.ar %t2.txt
-RUN: llvm-ar t %t-same.ar | FileCheck %s --check-prefix=SAME
-
-SAME:      1.txt
-SAME-NEXT: 2.txt
-SAME-NEXT: 3.txt
+## Test moving files after a file.
+
+RUN: touch %t1.txt
+RUN: touch %t2.txt
+RUN: touch %t3.txt
+RUN: touch %t4.txt
+
+# Move one file:
+RUN: rm -f %t-one.ar
+RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t2.txt %t-one.a %t3.txt
+RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE
+
+ONE:      1.txt
+ONE-NEXT: 3.txt
+ONE-NEXT: 2.txt
+
+# Move file to front:
+RUN: rm -f %t-front.ar
+RUN: llvm-ar rc %t-front.ar %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t1.txt %t-front.ar %t3.txt
+RUN: llvm-ar t %t-front.ar | FileCheck %s --check-prefix=FRONT
+
+FRONT:      3.txt
+FRONT-NEXT: 1.txt
+FRONT-NEXT: 2.txt
+
+# Move multiple files:
+RUN: rm -f %t-multiple.ar
+RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt
+RUN: llvm-ar mb %t2.txt %t-multiple.a %t4.txt %t3.txt
+RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE
+
+MULTIPLE:      1.txt
+MULTIPLE-NEXT: 3.txt
+MULTIPLE-NEXT: 4.txt
+MULTIPLE-NEXT: 2.txt
+
+# Move before invalid file:
+RUN: rm -f %t-invalid.ar
+RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt
+RUN: not llvm-ar mb invalid.txt %t-invalid.a %t2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=ERROR
+RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID
+
+ERROR: error: Insertion point not found.
+INVALID:      1.txt
+INVALID-NEXT: 2.txt
+INVALID-NEXT: 3.txt
+
+# Move file to the same position:
+RUN: rm -f %t-position.ar
+RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t3.txt %t-position.a %t2.txt
+RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION
+
+POSITION:      1.txt
+POSITION-NEXT: 2.txt
+POSITION-NEXT: 3.txt
+
+# Move file after itself:
+RUN: rm -f %t-same.ar
+RUN: llvm-ar rc %t-same.ar %t1.txt %t2.txt %t3.txt
+RUN: llvm-ar mb %t2.txt %t-same.ar %t2.txt
+RUN: llvm-ar t %t-same.ar | FileCheck %s --check-prefix=SAME
+
+SAME:      1.txt
+SAME-NEXT: 2.txt
+SAME-NEXT: 3.txt
diff --git a/llvm/test/tools/llvm-ar/move.test b/llvm/test/tools/llvm-ar/move.test
index c5028f52e405c..268fbf94cf19f 100644
--- a/llvm/test/tools/llvm-ar/move.test
+++ b/llvm/test/tools/llvm-ar/move.test
@@ -1,129 +1,129 @@
-## Test the move command without modifiers moves members to the end
-
-# RUN: rm -rf %t && mkdir -p %t
-# RUN: yaml2obj %s -o %t/1.o --docnum=1
-# RUN: yaml2obj %s -o %t/2.o --docnum=2
-# RUN: yaml2obj %s -o %t/3.o --docnum=3
-
-## Move single member:
-# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar m %t/single.a %t/1.o
-# RUN: llvm-ar t %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
-
-# SINGLE:      2.o
-# SINGLE-NEXT: 3.o
-# SINGLE-NEXT: 1.o
-
-# RUN: llvm-nm --print-armap %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
-
-# SINGLE-SYM:      symbol2
-# SINGLE-SYM-NEXT: symbol3
-# SINGLE-SYM-NEXT: symbol1
-
-## Move multiple members:
-# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar m %t/multiple.a %t/1.o %t/2.o
-# RUN: llvm-ar t %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
-
-# MULTIPLE:      3.o
-# MULTIPLE-NEXT: 1.o
-# MULTIPLE-NEXT: 2.o
-
-# RUN: llvm-nm --print-armap %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
-
-# MULTIPLE-SYM:      symbol3
-# MULTIPLE-SYM-NEXT: symbol1
-# MULTIPLE-SYM-NEXT: symbol2
-
-## Move same member:
-# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar m %t/same.a %t/1.o %t/1.o
-# RUN: llvm-ar t %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o
-
-# SAME:      2.o
-# SAME-NEXT: 3.o
-# SAME-NEXT: 1.o
-
-# RUN: llvm-nm --print-armap %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME-SYM
-
-# SAME-SYM:      symbol2
-# SAME-SYM-NEXT: symbol3
-# SAME-SYM-NEXT: symbol1
-
-## Move without member:
-# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar m %t/without.a
-# RUN: llvm-ar t %t/without.a \
-# RUN:   | FileCheck %s --match-full-lines --check-prefix=WITHOUT --implicit-check-not {{.}}
-
-# WITHOUT:      1.o
-# WITHOUT-NEXT: 2.o
-# WITHOUT-NEXT: 3.o
-
-# RUN: llvm-nm --print-armap %t/without.a \
-# RUN:   | FileCheck %s --check-prefix=WITHOUT-SYM
-
-# WITHOUT-SYM:      symbol1
-# WITHOUT-SYM-NEXT: symbol2
-# WITHOUT-SYM-NEXT: symbol3
-
-## No archive:
-# RUN: not llvm-ar m 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
-
-# NO-ARCHIVE: error: An archive name must be specified.
-
-## Member does not exist:
-# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o
-# RUN: not llvm-ar m %t/missing.a %t/missing.txt 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
-
-# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory.
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol2
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol3
-    Binding: STB_GLOBAL
-    Section: .text
+## Test the move command without modifiers moves members to the end
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+# RUN: yaml2obj %s -o %t/3.o --docnum=3
+
+## Move single member:
+# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/single.a %t/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
+
+# SINGLE:      2.o
+# SINGLE-NEXT: 3.o
+# SINGLE-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM:      symbol2
+# SINGLE-SYM-NEXT: symbol3
+# SINGLE-SYM-NEXT: symbol1
+
+## Move multiple members:
+# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/multiple.a %t/1.o %t/2.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
+
+# MULTIPLE:      3.o
+# MULTIPLE-NEXT: 1.o
+# MULTIPLE-NEXT: 2.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbol3
+# MULTIPLE-SYM-NEXT: symbol1
+# MULTIPLE-SYM-NEXT: symbol2
+
+## Move same member:
+# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/same.a %t/1.o %t/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o
+
+# SAME:      2.o
+# SAME-NEXT: 3.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbol2
+# SAME-SYM-NEXT: symbol3
+# SAME-SYM-NEXT: symbol1
+
+## Move without member:
+# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar m %t/without.a
+# RUN: llvm-ar t %t/without.a \
+# RUN:   | FileCheck %s --match-full-lines --check-prefix=WITHOUT --implicit-check-not {{.}}
+
+# WITHOUT:      1.o
+# WITHOUT-NEXT: 2.o
+# WITHOUT-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/without.a \
+# RUN:   | FileCheck %s --check-prefix=WITHOUT-SYM
+
+# WITHOUT-SYM:      symbol1
+# WITHOUT-SYM-NEXT: symbol2
+# WITHOUT-SYM-NEXT: symbol3
+
+## No archive:
+# RUN: not llvm-ar m 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+# NO-ARCHIVE: error: An archive name must be specified.
+
+## Member does not exist:
+# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o
+# RUN: not llvm-ar m %t/missing.a %t/missing.txt 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
+
+# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory.
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol3
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/mri-addlib.test b/llvm/test/tools/llvm-ar/mri-addlib.test
index c297653e2abb1..cf62acbf0b063 100644
--- a/llvm/test/tools/llvm-ar/mri-addlib.test
+++ b/llvm/test/tools/llvm-ar/mri-addlib.test
@@ -1,45 +1,45 @@
-## Test the ADDLIB MRI command.
-
-# RUN: rm -rf %t && mkdir -p %t
-# RUN: yaml2obj %s -o %t/f.o
-# RUN: llvm-ar r %t/f.a %t/f.o
-
-## Merge contents of archives.
-# RUN: echo "CREATE %t/addlib.a" > %t/addlib.mri
-# RUN: echo "ADDLIB %t/f.a" >> %t/addlib.mri
-# RUN: echo "SAVE" >> %t/addlib.mri
-# RUN: llvm-ar -M < %t/addlib.mri
-# RUN: llvm-nm --print-armap %t/addlib.a | FileCheck --check-prefix=SYMS %s
-# RUN: llvm-ar t %t/addlib.a | FileCheck --check-prefix=FILES %s
-
-# SYMS: f in {{.*}}
-# FILES: f.o
-
-## ADDLIB with non-archive file.
-# RUN: echo "CREATE %t/badlib.a" > %t/badlib.mri
-# RUN: echo "ADDLIB %s" >> %t/badlib.mri
-# RUN: echo "SAVE" >> %t/badlib.mri
-# RUN: not llvm-ar -M < %t/badlib.mri 2>&1 | FileCheck --check-prefix=PARSE %s
-# RUN: not ls %t/badlib.a
-
-# PARSE: Could not parse library
-
-## No create command.
-# RUN: echo "ADDLIB %t/f.a"  > %t/nocreate.mri
-# RUN: echo "SAVE" >> %t/nocreate.mri
-# RUN: not llvm-ar -M < %t/nocreate.mri
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-    - Name:    f
-      Binding: STB_GLOBAL
-      Section: .text
-...
+## Test the ADDLIB MRI command.
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/f.o
+# RUN: llvm-ar r %t/f.a %t/f.o
+
+## Merge contents of archives.
+# RUN: echo "CREATE %t/addlib.a" > %t/addlib.mri
+# RUN: echo "ADDLIB %t/f.a" >> %t/addlib.mri
+# RUN: echo "SAVE" >> %t/addlib.mri
+# RUN: llvm-ar -M < %t/addlib.mri
+# RUN: llvm-nm --print-armap %t/addlib.a | FileCheck --check-prefix=SYMS %s
+# RUN: llvm-ar t %t/addlib.a | FileCheck --check-prefix=FILES %s
+
+# SYMS: f in {{.*}}
+# FILES: f.o
+
+## ADDLIB with non-archive file.
+# RUN: echo "CREATE %t/badlib.a" > %t/badlib.mri
+# RUN: echo "ADDLIB %s" >> %t/badlib.mri
+# RUN: echo "SAVE" >> %t/badlib.mri
+# RUN: not llvm-ar -M < %t/badlib.mri 2>&1 | FileCheck --check-prefix=PARSE %s
+# RUN: not ls %t/badlib.a
+
+# PARSE: Could not parse library
+
+## No create command.
+# RUN: echo "ADDLIB %t/f.a"  > %t/nocreate.mri
+# RUN: echo "SAVE" >> %t/nocreate.mri
+# RUN: not llvm-ar -M < %t/nocreate.mri
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+    - Name:    f
+      Binding: STB_GLOBAL
+      Section: .text
+...
diff --git a/llvm/test/tools/llvm-ar/mri-addmod.test b/llvm/test/tools/llvm-ar/mri-addmod.test
index 2b6e4dc3adbee..c26414fad0b8c 100644
--- a/llvm/test/tools/llvm-ar/mri-addmod.test
+++ b/llvm/test/tools/llvm-ar/mri-addmod.test
@@ -1,27 +1,27 @@
-## Test the ADDMOD MRI command.
-
-# RUN: rm -rf %t && mkdir -p %t
-# RUN: yaml2obj %s -o %t/f.o
-
-# RUN: echo "CREATE %t/addmod.a" > %t/addmod.mri
-# RUN: echo "ADDMOD %t/f.o" >> %t/addmod.mri
-# RUN: echo "SAVE" >> %t/addmod.mri
-# RUN: llvm-ar -M < %t/addmod.mri
-# RUN: llvm-nm --print-armap %t/addmod.a | FileCheck %s
-
-# CHECK: f in f.o
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-    - Name:    f
-      Binding: STB_GLOBAL
-      Section: .text
-...
+## Test the ADDMOD MRI command.
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/f.o
+
+# RUN: echo "CREATE %t/addmod.a" > %t/addmod.mri
+# RUN: echo "ADDMOD %t/f.o" >> %t/addmod.mri
+# RUN: echo "SAVE" >> %t/addmod.mri
+# RUN: llvm-ar -M < %t/addmod.mri
+# RUN: llvm-nm --print-armap %t/addmod.a | FileCheck %s
+
+# CHECK: f in f.o
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+    - Name:    f
+      Binding: STB_GLOBAL
+      Section: .text
+...
diff --git a/llvm/test/tools/llvm-ar/mri-comments.test b/llvm/test/tools/llvm-ar/mri-comments.test
index f5fa61768aeb0..aac62dc00e699 100644
--- a/llvm/test/tools/llvm-ar/mri-comments.test
+++ b/llvm/test/tools/llvm-ar/mri-comments.test
@@ -1,19 +1,19 @@
-# Test different MRI comment formats and white space.
-
-RUN: rm -rf %t && mkdir -p %t
-RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
-
-RUN: echo "create %t/mri.ar;comment" > %t/script.mri
-RUN: echo "addmod %t/elf.o * comment" >> %t/script.mri
-RUN: echo "; comment" >> %t/script.mri
-RUN: echo " ;comment" >> %t/script.mri
-RUN: echo "* comment" >> %t/script.mri
-RUN: echo " *comment" >> %t/script.mri
-RUN: echo "" >> %t/script.mri
-RUN: echo " " >> %t/script.mri
-RUN: echo " save" >> %t/script.mri
-
-RUN: llvm-ar -M < %t/script.mri
-RUN: llvm-ar t %t/mri.ar | FileCheck %s
-
-CHECK: elf.o
+# Test different MRI comment formats and white space.
+
+RUN: rm -rf %t && mkdir -p %t
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
+
+RUN: echo "create %t/mri.ar;comment" > %t/script.mri
+RUN: echo "addmod %t/elf.o * comment" >> %t/script.mri
+RUN: echo "; comment" >> %t/script.mri
+RUN: echo " ;comment" >> %t/script.mri
+RUN: echo "* comment" >> %t/script.mri
+RUN: echo " *comment" >> %t/script.mri
+RUN: echo "" >> %t/script.mri
+RUN: echo " " >> %t/script.mri
+RUN: echo " save" >> %t/script.mri
+
+RUN: llvm-ar -M < %t/script.mri
+RUN: llvm-ar t %t/mri.ar | FileCheck %s
+
+CHECK: elf.o
diff --git a/llvm/test/tools/llvm-ar/mri-end.test b/llvm/test/tools/llvm-ar/mri-end.test
index db4abce7d180e..a00f2f53aee26 100644
--- a/llvm/test/tools/llvm-ar/mri-end.test
+++ b/llvm/test/tools/llvm-ar/mri-end.test
@@ -1,48 +1,48 @@
-# The END MRI command is optional. Scripts that omit
-# or include END should be handled by llvm-ar.
-RUN: rm -rf %t && mkdir -p %t
-
-# Empty File
-RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
-
-RUN: touch %t/empty.mri
-RUN: llvm-ar -M < %t/empty.mri
-
-RUN: echo "END" > %t/empty-end.mri
-RUN: llvm-ar -M < %t/empty-end.mri
-
-# Comment only
-RUN: echo "; a comment" > %t/comment.mri
-RUN: llvm-ar -M < %t/comment.mri
-
-RUN: echo "; a comment" > %t/comment-end.mri
-RUN: echo "END" > %t/comment-end.mri
-RUN: llvm-ar -M < %t/comment-end.mri
-
-# Without Save
-RUN: echo "create %t/mri.ar" > %t/no-save.mri
-RUN: echo "addmod %t/elf.o" >> %t/no-save.mri
-RUN: llvm-ar -M < %t/no-save.mri
-RUN: test ! -e %t/mri.ar
-
-RUN: echo "create %t/mri.ar" > %t/no-save-end.mri
-RUN: echo "addmod %t/elf.o" >> %t/no-save-end.mri
-RUN: echo "END" > %t/no-save-end.mri
-RUN: llvm-ar -M < %t/no-save-end.mri
-RUN: test ! -e %t/mri.ar
-
-# With Save
-RUN: echo "create %t/mri.ar" > %t/save.mri
-RUN: echo "addmod %t/elf.o" >> %t/save.mri
-RUN: echo "save" >> %t/save.mri
-RUN: llvm-ar -M < %t/save.mri
-RUN: llvm-ar t %t/mri.ar | FileCheck %s
-
-RUN: echo "create %t/mri.ar" > %t/save-end.mri
-RUN: echo "addmod %t/elf.o" >> %t/save-end.mri
-RUN: echo "save" >> %t/save-end.mri
-RUN: echo "END" > %t/no-save-end.mri
-RUN: llvm-ar -M < %t/save-end.mri
-RUN: llvm-ar t %t/mri.ar | FileCheck %s
-
-CHECK: elf.o
+# The END MRI command is optional. Scripts that omit
+# or include END should be handled by llvm-ar.
+RUN: rm -rf %t && mkdir -p %t
+
+# Empty File
+RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o
+
+RUN: touch %t/empty.mri
+RUN: llvm-ar -M < %t/empty.mri
+
+RUN: echo "END" > %t/empty-end.mri
+RUN: llvm-ar -M < %t/empty-end.mri
+
+# Comment only
+RUN: echo "; a comment" > %t/comment.mri
+RUN: llvm-ar -M < %t/comment.mri
+
+RUN: echo "; a comment" > %t/comment-end.mri
+RUN: echo "END" > %t/comment-end.mri
+RUN: llvm-ar -M < %t/comment-end.mri
+
+# Without Save
+RUN: echo "create %t/mri.ar" > %t/no-save.mri
+RUN: echo "addmod %t/elf.o" >> %t/no-save.mri
+RUN: llvm-ar -M < %t/no-save.mri
+RUN: test ! -e %t/mri.ar
+
+RUN: echo "create %t/mri.ar" > %t/no-save-end.mri
+RUN: echo "addmod %t/elf.o" >> %t/no-save-end.mri
+RUN: echo "END" > %t/no-save-end.mri
+RUN: llvm-ar -M < %t/no-save-end.mri
+RUN: test ! -e %t/mri.ar
+
+# With Save
+RUN: echo "create %t/mri.ar" > %t/save.mri
+RUN: echo "addmod %t/elf.o" >> %t/save.mri
+RUN: echo "save" >> %t/save.mri
+RUN: llvm-ar -M < %t/save.mri
+RUN: llvm-ar t %t/mri.ar | FileCheck %s
+
+RUN: echo "create %t/mri.ar" > %t/save-end.mri
+RUN: echo "addmod %t/elf.o" >> %t/save-end.mri
+RUN: echo "save" >> %t/save-end.mri
+RUN: echo "END" > %t/no-save-end.mri
+RUN: llvm-ar -M < %t/save-end.mri
+RUN: llvm-ar t %t/mri.ar | FileCheck %s
+
+CHECK: elf.o
diff --git a/llvm/test/tools/llvm-ar/quick-append.test b/llvm/test/tools/llvm-ar/quick-append.test
index ef2e5f74cc808..607ba413ae09d 100644
--- a/llvm/test/tools/llvm-ar/quick-append.test
+++ b/llvm/test/tools/llvm-ar/quick-append.test
@@ -1,93 +1,91 @@
-## Test quick append 
-
-# RUN: rm -rf %t && mkdir -p %t
-# RUN: yaml2obj %s -o %t/1.o --docnum=1
-# RUN: yaml2obj %s -o %t/2.o --docnum=2
-
-## Append single member:
-# RUN: llvm-ar qc %t/single.a %t/1.o
-# RUN: llvm-ar t %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
-
-# SINGLE: 1.o
-
-# RUN: llvm-nm --print-armap %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
-
-# SINGLE-SYM: symbol1
-
-## Append multiple members:
-# RUN: llvm-ar qc %t/multiple.a %t/1.o %t/2.o
-# RUN: llvm-ar t %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
-
-# MULTIPLE:      1.o
-# MULTIPLE-NEXT: 2.o
-
-# RUN: llvm-nm --print-armap %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
-
-# MULTIPLE-SYM:      symbol1
-# MULTIPLE-SYM-NEXT: symbol2
-
-## Append same member:
-# RUN: llvm-ar qc %t/same.a %t/1.o %t/1.o
-# RUN: llvm-ar t %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --match-full-lines --implicit-check-not {{.}}
-
-# SAME:      1.o
-# SAME-NEXT: 1.o
-
-# RUN: llvm-nm --print-armap %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME-SYM
-
-# SAME-SYM:      symbol1
-# SAME-SYM-NEXT: symbol1
-
-## Append without member:
-# RUN: llvm-ar qc %t/without.a
-# RUN: llvm-ar t %t/without.a \
-# RUN:   | FileCheck /dev/null --allow-empty --implicit-check-not={{.}}
-
-# RUN: llvm-nm --print-armap %t/without.a \
-# RUN:   | FileCheck /dev/null --allow-empty --implicit-check-not={{.}}
-
-## No archive:
-# RUN: not llvm-ar qc 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
-
-# NO-ARCHIVE: error: An archive name must be specified.
-
-## Member does not exist:
-# RUN: not llvm-ar qc %t/missing.a %t/missing.txt 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
-
-# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory.
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol2
-    Binding: STB_GLOBAL
-    Section: .text
+## Test quick append 
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+
+## Append single member:
+# RUN: llvm-ar qc %t/single.a %t/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
+
+# SINGLE: 1.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM: symbol1
+
+## Append multiple members:
+# RUN: llvm-ar qc %t/multiple.a %t/1.o %t/2.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
+
+# MULTIPLE:      1.o
+# MULTIPLE-NEXT: 2.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbol1
+# MULTIPLE-SYM-NEXT: symbol2
+
+## Append same member:
+# RUN: llvm-ar qc %t/same.a %t/1.o %t/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --match-full-lines --implicit-check-not {{.}}
+
+# SAME:      1.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbol1
+# SAME-SYM-NEXT: symbol1
+
+## Append without member:
+# RUN: llvm-ar qc %t/without.a
+# RUN: llvm-ar t %t/without.a | count 0
+
+# RUN: llvm-nm --print-armap %t/without.a | count 0
+
+## No archive:
+# RUN: not llvm-ar qc 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+# NO-ARCHIVE: error: An archive name must be specified.
+
+## Member does not exist:
+# RUN: not llvm-ar qc %t/missing.a %t/missing.txt 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
+
+# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory.
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/read-only-archive.test b/llvm/test/tools/llvm-ar/read-only-archive.test
index f4adea20218c0..1d638f45cac36 100644
--- a/llvm/test/tools/llvm-ar/read-only-archive.test
+++ b/llvm/test/tools/llvm-ar/read-only-archive.test
@@ -1,30 +1,30 @@
-## Test that read-only archives cannot be edited
-
-# RUN: rm -rf %t && mkdir -p %t
-# RUN: touch %t/1.txt
-# RUN: touch %t/2.txt
-# RUN: touch %t/3.txt
-
-# RUN: llvm-ar rc %t/archive.a %t/1.txt %t/2.txt
-
-## Make read only:
-# RUN: chmod 444 %t/archive.a
-
-# RUN: not llvm-ar r %t/archive.a %t/3.txt \
-# RUN:   FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a
-
-# ERROR: error: [[FILE]]: no such file or directory.
-
-# RUN: not llvm-ar q %t/archive.a %t/3.txt \
-# RUN:   FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a
-
-# RUN: not llvm-ar m %t/archive.a t/1.txt \
-# RUN:   FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a
-
-# ERROR-2: error: [[FILE]]: permission denied.
-
-# RUN: llvm-ar t %t/archive.a \
-# RUN:   | FileCheck %s --check-prefix=ARCHIVE --match-full-lines --implicit-check-not {{.}}
-
-# ARCHIVE: 1.txt
-# ARCHIVE-NEXT: 2.txt
+## Test that read-only archives cannot be edited
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: touch %t/1.txt
+# RUN: touch %t/2.txt
+# RUN: touch %t/3.txt
+
+# RUN: llvm-ar rc %t/archive.a %t/1.txt %t/2.txt
+
+## Make read only:
+# RUN: chmod 444 %t/archive.a
+
+# RUN: not llvm-ar r %t/archive.a %t/3.txt \
+# RUN:   FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a
+
+# ERROR: error: [[FILE]]: no such file or directory.
+
+# RUN: not llvm-ar q %t/archive.a %t/3.txt \
+# RUN:   FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a
+
+# RUN: not llvm-ar m %t/archive.a t/1.txt \
+# RUN:   FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a
+
+# ERROR-2: error: [[FILE]]: permission denied.
+
+# RUN: llvm-ar t %t/archive.a \
+# RUN:   | FileCheck %s --check-prefix=ARCHIVE --match-full-lines --implicit-check-not {{.}}
+
+# ARCHIVE: 1.txt
+# ARCHIVE-NEXT: 2.txt
diff --git a/llvm/test/tools/llvm-ar/replace-update.test b/llvm/test/tools/llvm-ar/replace-update.test
index 1b51dba1bca00..c056565f144c5 100644
--- a/llvm/test/tools/llvm-ar/replace-update.test
+++ b/llvm/test/tools/llvm-ar/replace-update.test
@@ -1,189 +1,189 @@
-## Test that the replace command with "u" updates the relevant members.
-
-# RUN: rm -rf %t && mkdir -p %t/new/other
-
-# RUN: yaml2obj %s -o %t/1.o --docnum=1
-# RUN: yaml2obj %s -o %t/2.o --docnum=2
-# RUN: yaml2obj %s -o %t/3.o --docnum=3
-
-# RUN: env TZ=GMT touch -t 200001020304 %t/1.o
-# RUN: env TZ=GMT touch -t 200001020304 %t/2.o
-# RUN: env TZ=GMT touch -t 200001020304 %t/3.o
-
-# RUN: yaml2obj %s -o %t/new/1.o --docnum=4
-# RUN: yaml2obj %s -o %t/new/3.o --docnum=5
-
-# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6
-
-# RUN: env TZ=GMT touch -t 200001020304 %t/new/other/1.o
-
-## Replace single member with newer file:
-# RUN: llvm-ar rcU %t/single.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar ruU %t/single.a %t/new/1.o
-# RUN: llvm-ar t %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
-
-# SINGLE:      1.o
-# SINGLE-NEXT: 2.o
-# SINGLE-NEXT: 3.o
-
-# RUN: llvm-nm --print-armap %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
-
-# SINGLE-SYM:      symbolnew1
-# SINGLE-SYM-NEXT: symbol2
-# SINGLE-SYM-NEXT: symbol3
-
-## Replace new single member with older file:
-# RUN: llvm-ar ruU %t/single.a %t/1.o
-# RUN: llvm-ar t %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
-
-# RUN: llvm-nm --print-armap %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
-
-## Replace multiple members with newer files:
-# RUN: llvm-ar rcuU %t/multiple.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar ruU %t/multiple.a %t/new/1.o %t/new/3.o
-# RUN: llvm-ar t %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
-
-# MULTIPLE:      1.o
-# MULTIPLE-NEXT: 2.o
-# MULTIPLE-NEXT: 3.o
-
-# RUN: llvm-nm --print-armap %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
-
-# MULTIPLE-SYM:      symbolnew1
-# MULTIPLE-SYM-NEXT: symbol2
-# MULTIPLE-SYM-NEXTs: symbolnew3
-
-## Replace newer members with multiple older files:
-# RUN: llvm-ar ruU %t/multiple.a %t/1.o %t/2.o
-# RUN: llvm-ar t %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
-
-# RUN: llvm-nm --print-armap %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
-
-## Replace same member with newer files:
-# RUN: llvm-ar rcuU %t/same.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar ruU %t/same.a %t/new/1.o %t/new/other/1.o
-# RUN: llvm-ar t %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}}
-
-# SAME:      1.o
-# SAME-NEXT: 2.o
-# SAME-NEXT: 3.o
-# SAME-NEXT: 1.o
-
-# RUN: llvm-nm --print-armap %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME-SYM
-
-# SAME-SYM:      symbolnew1
-# SAME-SYM-NEXT: symbol2
-# SAME-SYM-NEXT: symbol3
-# SAME-SYM-NEXT: symbolother1
-
-## Replace multiple members with an older file and a newer file:
-# RUN: llvm-ar rcuU %t/old-new.a %t/new/1.o %t/2.o %t/3.o
-# RUN: llvm-ar ruU %t/old-new.a %t/1.o %t/new/3.o
-# RUN: llvm-ar t %t/old-new.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
-
-# RUN: llvm-nm --print-armap %t/old-new.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
-
-## Replace same member with an older file and a newer file:
-# RUN: llvm-ar rcuU %t/old-new-same.a %t/new/1.o %t/2.o %t/3.o
-# RUN: llvm-ar ruU %t/old-new-same.a %t/1.o %t/new/other/1.o
-# RUN: llvm-ar t %t/old-new-same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}}
-
-# RUN: llvm-nm --print-armap %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME-SYM
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol2
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol3
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbolnew1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbolnew3
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbolother1
-    Binding: STB_GLOBAL
-    Section: .text
+## Test that the replace command with "u" updates the relevant members.
+
+# RUN: rm -rf %t && mkdir -p %t/new/other
+
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+# RUN: yaml2obj %s -o %t/3.o --docnum=3
+
+# RUN: env TZ=GMT touch -t 200001020304 %t/1.o
+# RUN: env TZ=GMT touch -t 200001020304 %t/2.o
+# RUN: env TZ=GMT touch -t 200001020304 %t/3.o
+
+# RUN: yaml2obj %s -o %t/new/1.o --docnum=4
+# RUN: yaml2obj %s -o %t/new/3.o --docnum=5
+
+# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6
+
+# RUN: env TZ=GMT touch -t 200001020304 %t/new/other/1.o
+
+## Replace single member with newer file:
+# RUN: llvm-ar rcU %t/single.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/single.a %t/new/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
+
+# SINGLE:      1.o
+# SINGLE-NEXT: 2.o
+# SINGLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM:      symbolnew1
+# SINGLE-SYM-NEXT: symbol2
+# SINGLE-SYM-NEXT: symbol3
+
+## Replace new single member with older file:
+# RUN: llvm-ar ruU %t/single.a %t/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+## Replace multiple members with newer files:
+# RUN: llvm-ar rcuU %t/multiple.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/multiple.a %t/new/1.o %t/new/3.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# MULTIPLE:      1.o
+# MULTIPLE-NEXT: 2.o
+# MULTIPLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbolnew1
+# MULTIPLE-SYM-NEXT: symbol2
+# MULTIPLE-SYM-NEXTs: symbolnew3
+
+## Replace newer members with multiple older files:
+# RUN: llvm-ar ruU %t/multiple.a %t/1.o %t/2.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+## Replace same member with newer files:
+# RUN: llvm-ar rcuU %t/same.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/same.a %t/new/1.o %t/new/other/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}}
+
+# SAME:      1.o
+# SAME-NEXT: 2.o
+# SAME-NEXT: 3.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbolnew1
+# SAME-SYM-NEXT: symbol2
+# SAME-SYM-NEXT: symbol3
+# SAME-SYM-NEXT: symbolother1
+
+## Replace multiple members with an older file and a newer file:
+# RUN: llvm-ar rcuU %t/old-new.a %t/new/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/old-new.a %t/1.o %t/new/3.o
+# RUN: llvm-ar t %t/old-new.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/old-new.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+## Replace same member with an older file and a newer file:
+# RUN: llvm-ar rcuU %t/old-new-same.a %t/new/1.o %t/2.o %t/3.o
+# RUN: llvm-ar ruU %t/old-new-same.a %t/1.o %t/new/other/1.o
+# RUN: llvm-ar t %t/old-new-same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}}
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol3
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew3
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolother1
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/replace.test b/llvm/test/tools/llvm-ar/replace.test
index 2a7cddfdeb535..0c1722a23ef5a 100644
--- a/llvm/test/tools/llvm-ar/replace.test
+++ b/llvm/test/tools/llvm-ar/replace.test
@@ -1,178 +1,178 @@
-## Test the replace command without modifiers replaces the relevant members.
-
-# RUN: rm -rf %t && mkdir -p %t/new/other
-# RUN: yaml2obj %s -o %t/1.o --docnum=1
-# RUN: yaml2obj %s -o %t/2.o --docnum=2
-# RUN: yaml2obj %s -o %t/3.o --docnum=3
-
-# RUN: yaml2obj %s -o %t/new/1.o --docnum=4
-# RUN: yaml2obj %s -o %t/new/3.o --docnum=5
-
-# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6
-
-## Replace single member:
-# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar r %t/single.a %t/new/1.o
-# RUN: llvm-ar t %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
-
-# SINGLE:      1.o
-# SINGLE-NEXT: 2.o
-# SINGLE-NEXT: 3.o
-
-# RUN: llvm-nm --print-armap %t/single.a \
-# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
-
-# SINGLE-SYM:      symbolnew1
-# SINGLE-SYM-NEXT: symbol2
-# SINGLE-SYM-NEXT: symbol3
-
-## Replace multiple members:
-# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar r %t/multiple.a %t/new/1.o %t/new/3.o
-# RUN: llvm-ar t %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
-
-# MULTIPLE:      1.o
-# MULTIPLE-NEXT: 2.o
-# MULTIPLE-NEXT: 3.o
-
-# RUN: llvm-nm --print-armap %t/multiple.a \
-# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
-
-# MULTIPLE-SYM:      symbolnew1
-# MULTIPLE-SYM-NEXT: symbol2
-# MULTIPLE-SYM-NEXT: symbolnew3
-
-## Replace same member:
-# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar r %t/same.a %t/new/1.o %t/new/other/1.o
-# RUN: llvm-ar t %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME  --implicit-check-not {{.}}
-
-# SAME:      1.o
-# SAME-NEXT: 2.o
-# SAME-NEXT: 3.o
-# SAME-NEXT: 1.o
-
-# RUN: llvm-nm --print-armap %t/same.a \
-# RUN:   | FileCheck %s --check-prefix=SAME-SYM
-
-# SAME-SYM:      symbolnew1
-# SAME-SYM-NEXT: symbol2
-# SAME-SYM-NEXT: symbol3
-# SAME-SYM-NEXT: symbolother1
-
-## Replace without member:
-# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o
-# RUN: llvm-ar r %t/without.a
-# RUN: llvm-ar t %t/without.a \
-# RUN:   | FileCheck %s --check-prefix=WITHOUT --implicit-check-not {{.}}
-
-# WITHOUT:      1.o
-# WITHOUT-NEXT: 2.o
-# WITHOUT-NEXT: 3.o
-
-# RUN: llvm-nm --print-armap %t/without.a \
-# RUN:   | FileCheck %s --check-prefix=WITHOUT-SYM
-
-# WITHOUT-SYM:      symbol1
-# WITHOUT-SYM-NEXT: symbol2
-# WITHOUT-SYM-NEXT: symbol3
-
-## No archive:
-# RUN: not llvm-ar r 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
-
-# NO-ARCHIVE: error: An archive name must be specified.
-
-## Member does not exist:
-# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o
-# RUN: not llvm-ar r %t/missing.a %t/missing.txt 2>&1 \
-# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
-
-# MISSING-FILE: error: [[FILE]]: {{[Nn]}}o such file or directory.
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol2
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol3
-    Binding: STB_GLOBAL
-    Section: .text
-    
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbolnew1
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbolnew3
-    Binding: STB_GLOBAL
-    Section: .text
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbolother1
-    Binding: STB_GLOBAL
-    Section: .text
+## Test the replace command without modifiers replaces the relevant members.
+
+# RUN: rm -rf %t && mkdir -p %t/new/other
+# RUN: yaml2obj %s -o %t/1.o --docnum=1
+# RUN: yaml2obj %s -o %t/2.o --docnum=2
+# RUN: yaml2obj %s -o %t/3.o --docnum=3
+
+# RUN: yaml2obj %s -o %t/new/1.o --docnum=4
+# RUN: yaml2obj %s -o %t/new/3.o --docnum=5
+
+# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6
+
+## Replace single member:
+# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/single.a %t/new/1.o
+# RUN: llvm-ar t %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}}
+
+# SINGLE:      1.o
+# SINGLE-NEXT: 2.o
+# SINGLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/single.a \
+# RUN:   | FileCheck %s --check-prefix=SINGLE-SYM
+
+# SINGLE-SYM:      symbolnew1
+# SINGLE-SYM-NEXT: symbol2
+# SINGLE-SYM-NEXT: symbol3
+
+## Replace multiple members:
+# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/multiple.a %t/new/1.o %t/new/3.o
+# RUN: llvm-ar t %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}}
+
+# MULTIPLE:      1.o
+# MULTIPLE-NEXT: 2.o
+# MULTIPLE-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/multiple.a \
+# RUN:   | FileCheck %s --check-prefix=MULTIPLE-SYM
+
+# MULTIPLE-SYM:      symbolnew1
+# MULTIPLE-SYM-NEXT: symbol2
+# MULTIPLE-SYM-NEXT: symbolnew3
+
+## Replace same member:
+# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/same.a %t/new/1.o %t/new/other/1.o
+# RUN: llvm-ar t %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME  --implicit-check-not {{.}}
+
+# SAME:      1.o
+# SAME-NEXT: 2.o
+# SAME-NEXT: 3.o
+# SAME-NEXT: 1.o
+
+# RUN: llvm-nm --print-armap %t/same.a \
+# RUN:   | FileCheck %s --check-prefix=SAME-SYM
+
+# SAME-SYM:      symbolnew1
+# SAME-SYM-NEXT: symbol2
+# SAME-SYM-NEXT: symbol3
+# SAME-SYM-NEXT: symbolother1
+
+## Replace without member:
+# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o
+# RUN: llvm-ar r %t/without.a
+# RUN: llvm-ar t %t/without.a \
+# RUN:   | FileCheck %s --check-prefix=WITHOUT --implicit-check-not {{.}}
+
+# WITHOUT:      1.o
+# WITHOUT-NEXT: 2.o
+# WITHOUT-NEXT: 3.o
+
+# RUN: llvm-nm --print-armap %t/without.a \
+# RUN:   | FileCheck %s --check-prefix=WITHOUT-SYM
+
+# WITHOUT-SYM:      symbol1
+# WITHOUT-SYM-NEXT: symbol2
+# WITHOUT-SYM-NEXT: symbol3
+
+## No archive:
+# RUN: not llvm-ar r 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+# NO-ARCHIVE: error: An archive name must be specified.
+
+## Member does not exist:
+# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o
+# RUN: not llvm-ar r %t/missing.a %t/missing.txt 2>&1 \
+# RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt
+
+# MISSING-FILE: error: [[FILE]]: {{[Nn]}}o such file or directory.
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol2
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol3
+    Binding: STB_GLOBAL
+    Section: .text
+    
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew1
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolnew3
+    Binding: STB_GLOBAL
+    Section: .text
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbolother1
+    Binding: STB_GLOBAL
+    Section: .text
diff --git a/llvm/test/tools/llvm-ar/symtab.test b/llvm/test/tools/llvm-ar/symtab.test
index e59a468d3f2f0..8dde4ede58135 100644
--- a/llvm/test/tools/llvm-ar/symtab.test
+++ b/llvm/test/tools/llvm-ar/symtab.test
@@ -1,70 +1,70 @@
-## Test the s and S modifiers. Build and do not build a symbol table.
-
-# RUN: yaml2obj %s -o %t.o
-# RUN: touch %t-other.txt
-
-## Default:
-# RUN: rm -f %t-default.a
-# RUN: llvm-ar rc %t-default.a %t.o
-# RUN: llvm-nm --print-armap %t-default.a \
-# RUN:   | FileCheck %s --check-prefix=SYMTAB
-
-## Use a modifer:
-# RUN: rm -f %t-symtab.a
-# RUN: llvm-ar rcs %t-symtab.a %t.o
-# RUN: llvm-nm --print-armap %t-symtab.a \
-# RUN:   | FileCheck %s --check-prefix=SYMTAB
-
-# RUN: rm -f %t-no-symtab.a
-# RUN: llvm-ar rcS %t-no-symtab.a %t.o
-# RUN: llvm-nm --print-armap %t-no-symtab.a \
-# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
-
-## Use both modifers:
-# RUN: rm -f %t-symtab-last.a
-# RUN: llvm-ar rcSs %t-symtab-last.a %t.o
-# RUN: llvm-nm --print-armap %t-symtab-last.a \
-# RUN:   | FileCheck %s --check-prefix=SYMTAB
-
-# RUN: rm -f %t-no-symtab-last.a
-# RUN: llvm-ar rcsS %t-no-symtab-last.a %t.o
-# RUN: llvm-nm --print-armap %t-no-symtab-last.a \
-# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
-
-## Use an existing archive:
-# RUN: rm -f %t-to-symtab.a
-# RUN: llvm-ar rcS %t-to-symtab.a %t.o
-# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt
-# RUN: llvm-nm --print-armap %t-to-symtab.a \
-# RUN:   | FileCheck %s --check-prefix=SYMTAB
-
-# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt
-# RUN: llvm-nm --print-armap %t-to-symtab.a \
-# RUN:   | FileCheck %s --check-prefix=SYMTAB
-
-# RUN: rm -f %t-to-no-symtab.a
-# RUN: llvm-ar rcs %t-to-no-symtab.a %t.o
-# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt
-# RUN: llvm-nm --print-armap %t-to-no-symtab.a \
-# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
-
-# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt
-# RUN: llvm-nm --print-armap %t-to-no-symtab.a \
-# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
-
-# SYMTAB:        symbol in
-# NO-SYMTAB-NOT: symbol in
-
---- !ELF
-FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-Symbols:
-  - Name:    symbol
-    Binding: STB_GLOBAL
-    Section: .text
+## Test the s and S modifiers. Build and do not build a symbol table.
+
+# RUN: yaml2obj %s -o %t.o
+# RUN: touch %t-other.txt
+
+## Default:
+# RUN: rm -f %t-default.a
+# RUN: llvm-ar rc %t-default.a %t.o
+# RUN: llvm-nm --print-armap %t-default.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+## Use a modifer:
+# RUN: rm -f %t-symtab.a
+# RUN: llvm-ar rcs %t-symtab.a %t.o
+# RUN: llvm-nm --print-armap %t-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: rm -f %t-no-symtab.a
+# RUN: llvm-ar rcS %t-no-symtab.a %t.o
+# RUN: llvm-nm --print-armap %t-no-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+## Use both modifers:
+# RUN: rm -f %t-symtab-last.a
+# RUN: llvm-ar rcSs %t-symtab-last.a %t.o
+# RUN: llvm-nm --print-armap %t-symtab-last.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: rm -f %t-no-symtab-last.a
+# RUN: llvm-ar rcsS %t-no-symtab-last.a %t.o
+# RUN: llvm-nm --print-armap %t-no-symtab-last.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+## Use an existing archive:
+# RUN: rm -f %t-to-symtab.a
+# RUN: llvm-ar rcS %t-to-symtab.a %t.o
+# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=SYMTAB
+
+# RUN: rm -f %t-to-no-symtab.a
+# RUN: llvm-ar rcs %t-to-no-symtab.a %t.o
+# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-no-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt
+# RUN: llvm-nm --print-armap %t-to-no-symtab.a \
+# RUN:   | FileCheck %s --check-prefix=NO-SYMTAB
+
+# SYMTAB:        symbol in
+# NO-SYMTAB-NOT: symbol in
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name: .text
+    Type: SHT_PROGBITS
+Symbols:
+  - Name:    symbol
+    Binding: STB_GLOBAL
+    Section: .text

From aefcf5100aae57ed2ff6a15356bd25d74e8fb27e Mon Sep 17 00:00:00 2001
From: Gabor Marton <gabor.marton@ericsson.com>
Date: Wed, 17 Jul 2019 13:47:46 +0000
Subject: [PATCH 343/451] [ASTImporter] Fix LLDB lookup in transparent ctx and
 with ext src

Summary:
With LLDB we use localUncachedLookup(), however, that fails to find
Decls when a transparent context is involved and the given DC has
external lexical storage.  The solution is to use noload_lookup, which
works well with transparent contexts.  But, we cannot use only the
noload_lookup since the slow case of localUncachedLookup is still needed
in some other cases.

These other cases are handled in ASTImporterLookupTable, but we cannot
use that with LLDB since that traverses through the AST which initiates
the load of external decls again via DC::decls().

We must avoid loading external decls during the import becuase
ExternalASTSource is implemented with ASTImporter, so external loads
during import results in uncontrolled and faulty import.

Reviewers: shafik, teemperor, jingham, clayborg, a_sidorin, a.sidorin

Subscribers: rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits, lldb-commits

Tags: #clang, #lldb

Differential Revision: https://reviews.llvm.org/D61333

llvm-svn: 366325
---
 clang/lib/AST/ASTImporter.cpp                 | 35 +++++++++++---
 clang/unittests/AST/ASTImporterTest.cpp       | 48 +++++++++++++++++++
 .../test/lang/c/modules/TestCModules.py       | 14 ++++++
 .../lldbsuite/test/lang/c/modules/main.c      |  4 +-
 .../ExpressionParser/Clang/ClangASTSource.cpp | 30 +++++++-----
 5 files changed, 110 insertions(+), 21 deletions(-)

diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 8a59c3a76990d..9d5dd84161dec 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -1707,6 +1707,17 @@ static Error setTypedefNameForAnonDecl(TagDecl *From, TagDecl *To,
 
 Error ASTNodeImporter::ImportDefinition(
     RecordDecl *From, RecordDecl *To, ImportDefinitionKind Kind) {
+  auto DefinitionCompleter = [To]() {
+    // There are cases in LLDB when we first import a class without its
+    // members. The class will have DefinitionData, but no members. Then,
+    // importDefinition is called from LLDB, which tries to get the members, so
+    // when we get here, the class already has the DefinitionData set, so we
+    // must unset the CompleteDefinition here to be able to complete again the
+    // definition.
+    To->setCompleteDefinition(false);
+    To->completeDefinition();
+  };
+
   if (To->getDefinition() || To->isBeingDefined()) {
     if (Kind == IDK_Everything ||
         // In case of lambdas, the class already has a definition ptr set, but
@@ -1717,7 +1728,7 @@ Error ASTNodeImporter::ImportDefinition(
       Error Result = ImportDeclContext(From, /*ForceImport=*/true);
       // Finish the definition of the lambda, set isBeingDefined to false.
       if (To->isLambda())
-        To->completeDefinition();
+        DefinitionCompleter();
       return Result;
     }
 
@@ -1728,8 +1739,8 @@ Error ASTNodeImporter::ImportDefinition(
   // Complete the definition even if error is returned.
   // The RecordDecl may be already part of the AST so it is better to
   // have it in complete state even if something is wrong with it.
-  auto DefinitionCompleter =
-      llvm::make_scope_exit([To]() { To->completeDefinition(); });
+  auto DefinitionCompleterScopeExit =
+      llvm::make_scope_exit(DefinitionCompleter);
 
   if (Error Err = setTypedefNameForAnonDecl(From, To, Importer))
     return Err;
@@ -7757,10 +7768,20 @@ ASTImporter::findDeclsInToCtx(DeclContext *DC, DeclarationName Name) {
         SharedState->getLookupTable()->lookup(ReDC, Name);
     return FoundDeclsTy(LookupResult.begin(), LookupResult.end());
   } else {
-    // FIXME Can we remove this kind of lookup?
-    // Or lldb really needs this C/C++ lookup?
-    FoundDeclsTy Result;
-    ReDC->localUncachedLookup(Name, Result);
+    DeclContext::lookup_result NoloadLookupResult = ReDC->noload_lookup(Name);
+    FoundDeclsTy Result(NoloadLookupResult.begin(), NoloadLookupResult.end());
+    // We must search by the slow case of localUncachedLookup because that is
+    // working even if there is no LookupPtr for the DC. We could use
+    // DC::buildLookup() to create the LookupPtr, but that would load external
+    // decls again, we must avoid that case.
+    // Also, even if we had the LookupPtr, we must find Decls which are not
+    // in the LookupPtr, so we need the slow case.
+    // These cases are handled in ASTImporterLookupTable, but we cannot use
+    // that with LLDB since that traverses through the AST which initiates the
+    // load of external decls again via DC::decls().  And again, we must avoid
+    // loading external decls during the import.
+    if (Result.empty())
+      ReDC->localUncachedLookup(Name, Result);
     return Result;
   }
 }
diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp
index 6b8315f2b9ead..8b2f7c5b240e4 100644
--- a/clang/unittests/AST/ASTImporterTest.cpp
+++ b/clang/unittests/AST/ASTImporterTest.cpp
@@ -5122,6 +5122,51 @@ TEST_P(ASTImporterOptionSpecificTestBase, LambdaInFunctionParam) {
   EXPECT_EQ(ToLSize, FromLSize);
 }
 
+struct LLDBLookupTest : ASTImporterOptionSpecificTestBase {
+  LLDBLookupTest() {
+    Creator = [](ASTContext &ToContext, FileManager &ToFileManager,
+                 ASTContext &FromContext, FileManager &FromFileManager,
+                 bool MinimalImport,
+                 const std::shared_ptr<ASTImporterSharedState> &SharedState) {
+      return new ASTImporter(ToContext, ToFileManager, FromContext,
+                             FromFileManager, MinimalImport,
+                             // We use the regular lookup.
+                             /*SharedState=*/nullptr);
+    };
+  }
+};
+
+TEST_P(LLDBLookupTest, ImporterShouldFindInTransparentContext) {
+  TranslationUnitDecl *ToTU = getToTuDecl(
+      R"(
+      extern "C" {
+        class X{};
+      };
+      )",
+      Lang_CXX);
+  auto *ToX = FirstDeclMatcher<CXXRecordDecl>().match(
+      ToTU, cxxRecordDecl(hasName("X")));
+
+  // Set up a stub external storage.
+  ToTU->setHasExternalLexicalStorage(true);
+  // Set up DeclContextBits.HasLazyExternalLexicalLookups to true.
+  ToTU->setMustBuildLookupTable();
+  struct TestExternalASTSource : ExternalASTSource {};
+  ToTU->getASTContext().setExternalSource(new TestExternalASTSource());
+
+  Decl *FromTU = getTuDecl(
+      R"(
+        class X;
+      )",
+      Lang_CXX);
+  auto *FromX = FirstDeclMatcher<CXXRecordDecl>().match(
+      FromTU, cxxRecordDecl(hasName("X")));
+  auto *ImportedX = Import(FromX, Lang_CXX);
+  // The lookup must find the existing class definition in the LinkageSpecDecl.
+  // Then the importer renders the existing and the new decl into one chain.
+  EXPECT_EQ(ImportedX->getCanonicalDecl(), ToX->getCanonicalDecl());
+}
+
 INSTANTIATE_TEST_CASE_P(ParameterizedTests, ASTImporterLookupTableTest,
                         DefaultTestValuesForRunOptions, );
 
@@ -5168,5 +5213,8 @@ INSTANTIATE_TEST_CASE_P(ParameterizedTests, ImportImplicitMethods,
 INSTANTIATE_TEST_CASE_P(ParameterizedTests, ImportVariables,
                         DefaultTestValuesForRunOptions, );
 
+INSTANTIATE_TEST_CASE_P(ParameterizedTests, LLDBLookupTest,
+                        DefaultTestValuesForRunOptions, );
+
 } // end namespace ast_matchers
 } // end namespace clang
diff --git a/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py b/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py
index 455704280d143..857223b5ed109 100644
--- a/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py
+++ b/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py
@@ -47,6 +47,10 @@ def test_expr(self):
         self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE,
                     substrs=[' resolved, hit count = 1'])
 
+        # Enable logging of the imported AST.
+        log_file = os.path.join(self.getBuildDir(), "lldb-ast-log.txt")
+        self.runCmd("log enable lldb ast -f '%s'" % log_file)
+
         self.expect(
             "expr -l objc++ -- @import Darwin; 3",
             VARIABLES_DISPLAYED_CORRECTLY,
@@ -54,6 +58,8 @@ def test_expr(self):
                 "int",
                 "3"])
 
+        # This expr command imports __sFILE with definition
+        # (FILE is a typedef to __sFILE.)
         self.expect(
             "expr *fopen(\"/dev/zero\", \"w\")",
             VARIABLES_DISPLAYED_CORRECTLY,
@@ -61,6 +67,14 @@ def test_expr(self):
                 "FILE",
                 "_close"])
 
+        # Check that the AST log contains exactly one definition of __sFILE.
+        f = open(log_file)
+        log_lines = f.readlines()
+        f.close()
+        os.remove(log_file)
+        self.assertEqual(" ".join(log_lines).count("struct __sFILE definition"),
+                         1)
+
         self.expect("expr *myFile", VARIABLES_DISPLAYED_CORRECTLY,
                     substrs=["a", "5", "b", "9"])
 
diff --git a/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c b/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c
index 2b244bc38d02f..df321a75faaf1 100644
--- a/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c
+++ b/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c
@@ -5,11 +5,11 @@ int printf(const char * __restrict format, ...);
 typedef struct {
     int a;
     int b;
-} FILE;
+} MYFILE;
 
 int main()
 {
-    FILE *myFile = malloc(sizeof(FILE));
+    MYFILE *myFile = malloc(sizeof(MYFILE));
 
     myFile->a = 5;
     myFile->b = 9;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
index 8d29df9dde2db..c5778f86bb62d 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp
@@ -612,10 +612,15 @@ void ClangASTSource::FindExternalLexicalDecls(
   if (!original_decl_context)
     return;
 
+  // Indicates whether we skipped any Decls of the original DeclContext.
+  bool SkippedDecls = false;
   for (TagDecl::decl_iterator iter = original_decl_context->decls_begin();
        iter != original_decl_context->decls_end(); ++iter) {
     Decl *decl = *iter;
 
+    // The predicate function returns true if the passed declaration kind is
+    // the one we are looking for.
+    // See clang::ExternalASTSource::FindExternalLexicalDecls()
     if (predicate(decl->getKind())) {
       if (log) {
         ASTDumper ast_dumper(decl);
@@ -640,21 +645,22 @@ void ClangASTSource::FindExternalLexicalDecls(
 
         m_ast_importer_sp->RequireCompleteType(copied_field_type);
       }
-
-      DeclContext *decl_context_non_const =
-          const_cast<DeclContext *>(decl_context);
-
-      if (copied_decl->getDeclContext() != decl_context) {
-        if (copied_decl->getDeclContext()->containsDecl(copied_decl))
-          copied_decl->getDeclContext()->removeDecl(copied_decl);
-        copied_decl->setDeclContext(decl_context_non_const);
-      }
-
-      if (!decl_context_non_const->containsDecl(copied_decl))
-        decl_context_non_const->addDeclInternal(copied_decl);
+    } else {
+      SkippedDecls = true;
     }
   }
 
+  // CopyDecl may build a lookup table which may set up ExternalLexicalStorage
+  // to false.  However, since we skipped some of the external Decls we must
+  // set it back!
+  if (SkippedDecls) {
+    decl_context->setHasExternalLexicalStorage(true);
+    // This sets HasLazyExternalLexicalLookups to true.  By setting this bit we
+    // ensure that the lookup table is rebuilt, which means the external source
+    // is consulted again when a clang::DeclContext::lookup is called.
+    const_cast<DeclContext *>(decl_context)->setMustBuildLookupTable();
+  }
+
   return;
 }
 

From aa55124eca680e164956c9654a8f2a2fc60b1146 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 17 Jul 2019 13:48:49 +0000
Subject: [PATCH 344/451] [RISCV][NFC] Add tests that capture current encodings
 for DWARF EH

Items which are known to be wrong/different vs GCC are marked as TODO and will
be address in follow-up patches.

llvm-svn: 366326
---
 llvm/test/CodeGen/RISCV/dwarf-eh.ll  | 68 ++++++++++++++++++++++++++++
 llvm/test/DebugInfo/RISCV/eh-frame.s | 34 ++++++++++++++
 llvm/test/MC/RISCV/fde-reloc.s       | 31 +++++++++++++
 3 files changed, 133 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/dwarf-eh.ll
 create mode 100644 llvm/test/DebugInfo/RISCV/eh-frame.s
 create mode 100644 llvm/test/MC/RISCV/fde-reloc.s

diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
new file mode 100644
index 0000000000000..6abea5c2a655f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=riscv32 --code-model=small  < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,SMALL %s
+; RUN: llc -march=riscv32 --code-model=medium < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,MED   %s
+; RUN: llc -march=riscv32 --code-model=small  -relocation-model=pic < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN: llc -march=riscv32 --code-model=medium -relocation-model=pic < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN: llc -march=riscv64 --code-model=small  < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,SMALL %s
+; RUN: llc -march=riscv64 --code-model=medium < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,MED   %s
+; RUN: llc -march=riscv64 --code-model=small  -relocation-model=pic < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN: llc -march=riscv64 --code-model=medium -relocation-model=pic < %s \
+; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+
+declare void @throw_exception()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; CHECK-LABEL: test1:
+; CHECK: .cfi_startproc
+; TODO: Personality encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel |
+; DW_EH_PE_sdata4
+; CHECK-NEXT:	.cfi_personality 0, __gxx_personality_v0
+; TODO: LSDA encoding should be DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; CHECK-NEXT:	.cfi_lsda 0, .Lexception0
+
+define void @test1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @throw_exception() to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 }
+          catch i8* null
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1)
+  tail call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret void
+}
+
+; CHECK-LABEL: GCC_except_table0:
+; CHECK-NEXT: .Lexception0:
+; CHECK-NEXT: .byte	255 # @LPStart Encoding = omit
+; TODO: TTypeEncoding encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel |
+; DW_EH_PE_sdata4
+; CHECK-NEXT: .byte 0 # @TType Encoding = absptr
+; TODO: call site encoding should be DW_EH_PE_udata4
+; CHECK: .Lttbaseref0:
+; CHECK-NEXT: .byte	1                       # Call site Encoding = uleb128
+; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0
+; CHECK-NEXT: cst_begin0:
+; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0   # >> Call Site 1 <<
+; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0          #   Call between .Ltmp0 and .Ltmp1
+; CHECK-NEXT: .uleb128 .Ltmp2-.Lfunc_begin0   #     jumps to .Ltmp2
+; CHECK-NEXT: .byte	1                       #   On action: 1
+; CHECK-NEXT: .uleb128 .Ltmp1-.Lfunc_begin0   # >> Call Site 2 <<
+; CHECK-NEXT: .uleb128 .Lfunc_end0-.Ltmp1     #   Call between .Ltmp1 and .Lfunc_end0
+; CHECK-NEXT: .byte	0                       #     has no landing pad
+; CHECK-NEXT: .byte	0                       #   On action: cleanup
diff --git a/llvm/test/DebugInfo/RISCV/eh-frame.s b/llvm/test/DebugInfo/RISCV/eh-frame.s
new file mode 100644
index 0000000000000..fcff68c09085e
--- /dev/null
+++ b/llvm/test/DebugInfo/RISCV/eh-frame.s
@@ -0,0 +1,34 @@
+# RUN: llvm-mc -filetype=obj -triple=riscv32 < %s | llvm-dwarfdump -eh-frame - \
+# RUN:    | FileCheck --check-prefixes=CHECK,RV32 %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 < %s | llvm-dwarfdump -eh-frame - \
+# RUN:    | FileCheck --check-prefixes=CHECK,RV64 %s
+
+func:
+  .cfi_startproc
+  ret
+  .cfi_endproc
+
+# CHECK: 00000000 00000010 ffffffff CIE
+# CHECK:   Version:               1
+# CHECK:   Augmentation:          "zR"
+# CHECK:   Code alignment factor: 1
+
+# TODO: gas uses -4 for the data alignment factor for both RV32 and RV64. They
+# do so on the basis that on RV64F, F registers may only be 4 bytes
+# (DWARF2_CIE_DATA_ALIGNMENT).
+
+# RV32:    Data alignment factor: -4
+# RV64:    Data alignment factor: -8
+
+# CHECK:   Return address column: 1
+
+# Check the pointer encoding for address pointers used in FDE. This is set by
+# FDECFIEncoding and should be DW_EH_PE_pcrel | DW_EH_PE_sdata4 (0x1b).
+
+# CHECK:   Augmentation data:     1B
+# CHECK:   DW_CFA_def_cfa: reg2 +0
+#
+# CHECK: 00000014 00000010 00000018 FDE cie=00000018 pc=00000000...00000004
+# CHECK:   DW_CFA_nop:
+# CHECK:   DW_CFA_nop:
+# CHECK:   DW_CFA_nop:
diff --git a/llvm/test/MC/RISCV/fde-reloc.s b/llvm/test/MC/RISCV/fde-reloc.s
new file mode 100644
index 0000000000000..0794ad4d22e74
--- /dev/null
+++ b/llvm/test/MC/RISCV/fde-reloc.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+relax < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=RELAX-RELOC %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=-relax < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=NORELAX-RELOC %s
+
+func:
+	.cfi_startproc
+  ret
+	.cfi_endproc
+
+# TODO: Should produce R_RISCV_32_PCREL for the FDE pc relocation. Many of the
+# ADD32/SUB32 relocations also can be safely resolved even with linker
+# relaxation enabled. This test is written to capture current behaviour, in
+# preparation for follow-on patches to fix it.
+
+# RELAX-RELOC:   Section (4) .rela.eh_frame {
+# RELAX-RELOC-NEXT:   0x0 R_RISCV_ADD32 - 0xFFFFFFFC
+# RELAX-RELOC-NEXT:   0x0 R_RISCV_SUB32 - 0x0
+# RELAX-RELOC-NEXT:   0x14 R_RISCV_ADD32 - 0x0
+# RELAX-RELOC-NEXT:   0x14 R_RISCV_SUB32 - 0x0
+# RELAX-RELOC-NEXT:   0x18 R_RISCV_ADD32 - 0x0
+# RELAX-RELOC-NEXT:   0x18 R_RISCV_SUB32 - 0x0
+# RELAX-RELOC-NEXT:   0x1C R_RISCV_ADD32 - 0x0
+# RELAX-RELOC-NEXT:   0x1C R_RISCV_SUB32 - 0x0
+# RELAX-RELOC-NEXT:   0x20 R_RISCV_ADD32 - 0x0
+# RELAX-RELOC-NEXT:   0x20 R_RISCV_SUB32 - 0x0
+# RELAX-RELOC-NEXT: }
+
+# NORELAX-RELOC:        Section (4) .rela.eh_frame {
+# NORELAX-RELOC-NEXT:    0x1C R_RISCV_32_PCREL - 0x0
+# NORELAX-RELOC-NEXT:  }

From b94c233d06731b09d842ed86c5a72c44b40c65bb Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 17 Jul 2019 13:54:38 +0000
Subject: [PATCH 345/451] [RISCV] Set correct encodings for DWARF exception
 handling

This patch sets correct encodings for DWARF exception handling for RISC-V
(other than call site encoding, which must be udata4 rather than uleb128 and
is handled by D63415).

This has the same intend as D63409, except this version matches GCC/binutils
behaviour which uses the same encodings regardless of PIC/non-PIC and
medlow/medany code model.

llvm-svn: 366327
---
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  8 +++++
 llvm/test/CodeGen/RISCV/dwarf-eh.ll           | 30 +++++++++----------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4c18152ea8d81..d8e6b3ef93a34 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -219,6 +219,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
       TTypeEncoding = dwarf::DW_EH_PE_absptr;
     }
     break;
+  case Triple::riscv32:
+  case Triple::riscv64:
+    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                          dwarf::DW_EH_PE_sdata4;
+    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+                    dwarf::DW_EH_PE_sdata4;
+    break;
   case Triple::sparcv9:
     LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
     if (isPositionIndependent()) {
diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
index 6abea5c2a655f..67236d7da668e 100644
--- a/llvm/test/CodeGen/RISCV/dwarf-eh.ll
+++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
@@ -1,19 +1,19 @@
 ; RUN: llc -march=riscv32 --code-model=small  < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,SMALL %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv32 --code-model=medium < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,MED   %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv32 --code-model=small  -relocation-model=pic < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv32 --code-model=medium -relocation-model=pic < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv64 --code-model=small  < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,SMALL %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv64 --code-model=medium < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,MED   %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv64 --code-model=small  -relocation-model=pic < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN:     | FileCheck %s
 ; RUN: llc -march=riscv64 --code-model=medium -relocation-model=pic < %s \
-; RUN:     | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN:     | FileCheck %s
 
 declare void @throw_exception()
 
@@ -25,11 +25,10 @@ declare void @__cxa_end_catch()
 
 ; CHECK-LABEL: test1:
 ; CHECK: .cfi_startproc
-; TODO: Personality encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel |
-; DW_EH_PE_sdata4
-; CHECK-NEXT:	.cfi_personality 0, __gxx_personality_v0
-; TODO: LSDA encoding should be DW_EH_PE_pcrel | DW_EH_PE_sdata4
-; CHECK-NEXT:	.cfi_lsda 0, .Lexception0
+; PersonalityEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; CHECK-NEXT:	.cfi_personality 155, DW.ref.__gxx_personality_v0
+; LSDAEncoding = DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; CHECK-NEXT:	.cfi_lsda 27, .Lexception0
 
 define void @test1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 entry:
@@ -50,9 +49,8 @@ try.cont:
 ; CHECK-LABEL: GCC_except_table0:
 ; CHECK-NEXT: .Lexception0:
 ; CHECK-NEXT: .byte	255 # @LPStart Encoding = omit
-; TODO: TTypeEncoding encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel |
-; DW_EH_PE_sdata4
-; CHECK-NEXT: .byte 0 # @TType Encoding = absptr
+; TTypeEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4
+; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4
 ; TODO: call site encoding should be DW_EH_PE_udata4
 ; CHECK: .Lttbaseref0:
 ; CHECK-NEXT: .byte	1                       # Call site Encoding = uleb128

From 6584c4842fc3f5b6786114eb2bb1b7266beef0ee Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 17 Jul 2019 13:55:01 +0000
Subject: [PATCH 346/451] Mips: Remove immarg from copy and insert intrinsics

These intrinsics do in fact work with non-constant index arguments.

These are lowered to either the generic
ISD::INSERT_VECTOR_ELT/ISD::EXTRACT_VECTOR_ELT, or to
VEXTRACT_SEXT_ELT. The handling of these all accept variable
indexes. Turning these into generic instructions which do allow
variables introduces complications in a future change to immarg
handling.

Since these just turn into generic instructions, these are kind of
pointless and should probably just be autoupgraded to
extractelement/insertelement.

llvm-svn: 366328
---
 llvm/include/llvm/IR/IntrinsicsMips.td | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td
index 308bec9128993..6393a9ca35d55 100644
--- a/llvm/include/llvm/IR/IntrinsicsMips.td
+++ b/llvm/include/llvm/IR/IntrinsicsMips.td
@@ -798,22 +798,22 @@ def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
 
 def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">,
-  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_copy_s_h : GCCBuiltin<"__builtin_msa_copy_s_h">,
-  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_copy_s_w : GCCBuiltin<"__builtin_msa_copy_s_w">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_copy_s_d : GCCBuiltin<"__builtin_msa_copy_s_d">,
-  Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_copy_u_b : GCCBuiltin<"__builtin_msa_copy_u_b">,
-  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_copy_u_h : GCCBuiltin<"__builtin_msa_copy_u_h">,
-  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_copy_u_w : GCCBuiltin<"__builtin_msa_copy_u_w">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">,
-  Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>;
+  Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">,
   Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
@@ -1230,16 +1230,16 @@ def int_mips_ilvr_d : GCCBuiltin<"__builtin_msa_ilvr_d">,
 
 def int_mips_insert_b : GCCBuiltin<"__builtin_msa_insert_b">,
   Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, ImmArg<1>]>;
+  [IntrNoMem]>;
 def int_mips_insert_h : GCCBuiltin<"__builtin_msa_insert_h">,
   Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, ImmArg<1>]>;
+  [IntrNoMem]>;
 def int_mips_insert_w : GCCBuiltin<"__builtin_msa_insert_w">,
   Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
-  [IntrNoMem, ImmArg<1>]>;
+  [IntrNoMem]>;
 def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">,
   Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_i64_ty],
-  [IntrNoMem, ImmArg<1>]>;
+  [IntrNoMem]>;
 
 def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">,
   Intrinsic<[llvm_v16i8_ty],

From ab009a602e96b238000d9e20e5c54b078d08aad3 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 17 Jul 2019 14:00:35 +0000
Subject: [PATCH 347/451] [AsmPrinter] Make the encoding of call sites in
 .gcc_except_table configurable and use for RISC-V

The original behavior was to always emit the offsets to each call site in the
call site table as uleb128 values, however on some architectures (eg RISCV)
these uleb128 offsets into the code cannot always be resolved until link time
(because relaxation will invalidate any calculated offsets), and there are no
appropriate relocations for uleb128 values. As a consequence it needs to be
possible to specify an alternative.

This also switches RISCV to use DW_EH_PE_udata4 for call side encodings in
.gcc_except_table

Differential Revision: https://reviews.llvm.org/D63415
Patch by Edward Jones.

llvm-svn: 366329
---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |  6 ++++++
 .../llvm/Target/TargetLoweringObjectFile.h    |  2 ++
 .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp    | 19 +++++++++++++++++++
 llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp    | 13 +++++++------
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  2 ++
 llvm/lib/Target/TargetLoweringObjectFile.cpp  |  1 +
 llvm/test/CodeGen/RISCV/dwarf-eh.ll           | 14 +++++++-------
 7 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 9c2097b299520..d110f8b01cb5b 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -543,6 +543,12 @@ class AsmPrinter : public MachineFunctionPass {
     emitDwarfStringOffset(S.getEntry());
   }
 
+  /// Emit reference to a call site with a specified encoding
+  void EmitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
+                          unsigned Encoding) const;
+  /// Emit an integer value corresponding to the call site encoding
+  void EmitCallSiteValue(uint64_t Value, unsigned Encoding) const;
+
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
   virtual unsigned getISAEncoding() { return 0; }
 
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 7e094a1738b71..3a2497bff11e8 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -51,6 +51,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
   unsigned PersonalityEncoding = 0;
   unsigned LSDAEncoding = 0;
   unsigned TTypeEncoding = 0;
+  unsigned CallSiteEncoding = 0;
 
   /// This section contains the static constructor pointer list.
   MCSection *StaticCtorSection = nullptr;
@@ -147,6 +148,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
   unsigned getPersonalityEncoding() const { return PersonalityEncoding; }
   unsigned getLSDAEncoding() const { return LSDAEncoding; }
   unsigned getTTypeEncoding() const { return TTypeEncoding; }
+  unsigned getCallSiteEncoding() const { return CallSiteEncoding; }
 
   const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
                                   MCStreamer &Streamer) const;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 7bc64d091a0d6..992e44d953062 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -183,6 +183,25 @@ void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
   EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());
 }
 
+void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi,
+                                    const MCSymbol *Lo,
+                                    unsigned Encoding) const {
+  // The least significant 3 bits specify the width of the encoding
+  if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+    EmitLabelDifferenceAsULEB128(Hi, Lo);
+  else
+    EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitCallSiteValue(uint64_t Value,
+                                   unsigned Encoding) const {
+  // The least significant 3 bits specify the width of the encoding
+  if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+    EmitULEB128(Value);
+  else
+    OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding));
+}
+
 //===----------------------------------------------------------------------===//
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 2786f8d9f6a40..99e3687b36b8a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -378,7 +378,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
   bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
   unsigned CallSiteEncoding =
-      IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
+      IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
+               Asm->getObjFileLowering().getCallSiteEncoding();
   bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
 
   // Type infos.
@@ -523,24 +524,24 @@ MCSymbol *EHStreamer::emitExceptionTable() {
       // Offset of the call site relative to the start of the procedure.
       if (VerboseAsm)
         Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
-      Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym);
+      Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
       if (VerboseAsm)
         Asm->OutStreamer->AddComment(Twine("  Call between ") +
                                      BeginLabel->getName() + " and " +
                                      EndLabel->getName());
-      Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel);
+      Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
 
       // Offset of the landing pad relative to the start of the procedure.
       if (!S.LPad) {
         if (VerboseAsm)
           Asm->OutStreamer->AddComment("    has no landing pad");
-        Asm->EmitULEB128(0);
+        Asm->EmitCallSiteValue(0, CallSiteEncoding);
       } else {
         if (VerboseAsm)
           Asm->OutStreamer->AddComment(Twine("    jumps to ") +
                                        S.LPad->LandingPadLabel->getName());
-        Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel,
-                                          EHFuncBeginSym);
+        Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
+                                CallSiteEncoding);
       }
 
       // Offset of the first associated action record, relative to the start of
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d8e6b3ef93a34..4c8f75b237aa8 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -218,6 +218,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
       PersonalityEncoding = dwarf::DW_EH_PE_absptr;
       TTypeEncoding = dwarf::DW_EH_PE_absptr;
     }
+    CallSiteEncoding = dwarf::DW_EH_PE_udata4;
     break;
   case Triple::riscv32:
   case Triple::riscv64:
@@ -226,6 +227,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                           dwarf::DW_EH_PE_sdata4;
     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
                     dwarf::DW_EH_PE_sdata4;
+    CallSiteEncoding = dwarf::DW_EH_PE_udata4;
     break;
   case Triple::sparcv9:
     LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index ee32d01572461..17274e1c2c6eb 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -47,6 +47,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
 
   // Reset various EH DWARF encodings.
   PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr;
+  CallSiteEncoding = dwarf::DW_EH_PE_uleb128;
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
index 67236d7da668e..ecca81783e56c 100644
--- a/llvm/test/CodeGen/RISCV/dwarf-eh.ll
+++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
@@ -53,14 +53,14 @@ try.cont:
 ; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4
 ; TODO: call site encoding should be DW_EH_PE_udata4
 ; CHECK: .Lttbaseref0:
-; CHECK-NEXT: .byte	1                       # Call site Encoding = uleb128
+; CHECK-NEXT: .byte	3                       # Call site Encoding = udata4
 ; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0
 ; CHECK-NEXT: cst_begin0:
-; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0   # >> Call Site 1 <<
-; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0          #   Call between .Ltmp0 and .Ltmp1
-; CHECK-NEXT: .uleb128 .Ltmp2-.Lfunc_begin0   #     jumps to .Ltmp2
+; CHECK-NEXT: .word .Ltmp0-.Lfunc_begin0   # >> Call Site 1 <<
+; CHECK-NEXT: .word .Ltmp1-.Ltmp0          #   Call between .Ltmp0 and .Ltmp1
+; CHECK-NEXT: .word .Ltmp2-.Lfunc_begin0   #     jumps to .Ltmp2
 ; CHECK-NEXT: .byte	1                       #   On action: 1
-; CHECK-NEXT: .uleb128 .Ltmp1-.Lfunc_begin0   # >> Call Site 2 <<
-; CHECK-NEXT: .uleb128 .Lfunc_end0-.Ltmp1     #   Call between .Ltmp1 and .Lfunc_end0
-; CHECK-NEXT: .byte	0                       #     has no landing pad
+; CHECK-NEXT: .word .Ltmp1-.Lfunc_begin0   # >> Call Site 2 <<
+; CHECK-NEXT: .word .Lfunc_end0-.Ltmp1     #   Call between .Ltmp1 and .Lfunc_end0
+; CHECK-NEXT: .word	0                       #     has no landing pad
 ; CHECK-NEXT: .byte	0                       #   On action: cleanup

From c74fded05ff3d392d1190e39195f1e8d375660d1 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 17 Jul 2019 14:04:48 +0000
Subject: [PATCH 348/451] [RISCV][NFC] Remove outdated TODO from
 test/CodeGen/RISCV/dwarf-eh.ll

llvm-svn: 366330
---
 llvm/test/CodeGen/RISCV/dwarf-eh.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
index ecca81783e56c..5b192ea516708 100644
--- a/llvm/test/CodeGen/RISCV/dwarf-eh.ll
+++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll
@@ -51,8 +51,8 @@ try.cont:
 ; CHECK-NEXT: .byte	255 # @LPStart Encoding = omit
 ; TTypeEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4
 ; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4
-; TODO: call site encoding should be DW_EH_PE_udata4
 ; CHECK: .Lttbaseref0:
+; CallSiteEncoding = dwarf::DW_EH_PE_udata4
 ; CHECK-NEXT: .byte	3                       # Call site Encoding = udata4
 ; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0
 ; CHECK-NEXT: cst_begin0:

From 80de11ed0216a232e224f4b01ab4eb818401f829 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 17 Jul 2019 14:32:25 +0000
Subject: [PATCH 349/451] [RISCV] Add RISCV to LLVM_ALL_TARGETS so it s built
 by default

This follows the RFC <http://lists.llvm.org/pipermail/llvm-dev/2019-July/133724.html>.

Follow-on commits will add appropriate release notes changes etc.

Pushing this now and in a minimal form so there is reasonable time before 9.0
branches to resolve any issues arising from e.g. the backend being exposed on
different sanitizer setups.

The current builder for RISC-V is on the staging build-bot
<http://lab.llvm.org:8014/builders/llvm-riscv-linux>, however with the RISCV
backend being built by default it won't provide any real additional coverage.
We will shortly set up a builder that runs the test-suite in qemu-user.

llvm-svn: 366331
---
 llvm/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index acebd3feb8bf8..b8eb19848bc58 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -285,6 +285,7 @@ set(LLVM_ALL_TARGETS
   MSP430
   NVPTX
   PowerPC
+  RISCV
   Sparc
   SystemZ
   WebAssembly

From ae512b83d5fc79fb96844ce7cfe4c6a4fee96c9a Mon Sep 17 00:00:00 2001
From: Gabor Marton <gabor.marton@ericsson.com>
Date: Wed, 17 Jul 2019 14:40:09 +0000
Subject: [PATCH 350/451] [ASTImporter] Fix structural eq of lambdas

Summary:
The structural equivalence check reported false eq between lambda classes
with different parameters in their call signature.
The solution is to check the methods for equality too in case of lambda
classes.

Reviewers: a_sidorin, a.sidorin

Subscribers: rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64075

llvm-svn: 366332
---
 clang/lib/AST/ASTStructuralEquivalence.cpp    | 20 +++++++
 clang/unittests/AST/ASTImporterTest.cpp       | 16 ++++++
 .../AST/StructuralEquivalenceTest.cpp         | 52 +++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index bb2e353eeef2b..912db3c130c51 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -1085,6 +1085,19 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
   return true;
 }
 
+/// Determine structural equivalence of two lambda classes.
+static bool
+IsStructurallyEquivalentLambdas(StructuralEquivalenceContext &Context,
+                                CXXRecordDecl *D1, CXXRecordDecl *D2) {
+  assert(D1->isLambda() && D2->isLambda() &&
+         "Must be called on lambda classes");
+  if (!IsStructurallyEquivalent(Context, D1->getLambdaCallOperator(),
+                                D2->getLambdaCallOperator()))
+    return false;
+
+  return true;
+}
+
 /// Determine structural equivalence of two records.
 static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                      RecordDecl *D1, RecordDecl *D2) {
@@ -1166,6 +1179,13 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
         D1CXX->getASTContext().getExternalSource()->CompleteType(D1CXX);
       }
 
+      if (D1CXX->isLambda() != D2CXX->isLambda())
+        return false;
+      if (D1CXX->isLambda()) {
+        if (!IsStructurallyEquivalentLambdas(Context, D1CXX, D2CXX))
+          return false;
+      }
+
       if (D1CXX->getNumBases() != D2CXX->getNumBases()) {
         if (Context.Complain) {
           Context.Diag2(D2->getLocation(),
diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp
index 8b2f7c5b240e4..6ea350cc72657 100644
--- a/clang/unittests/AST/ASTImporterTest.cpp
+++ b/clang/unittests/AST/ASTImporterTest.cpp
@@ -5122,6 +5122,22 @@ TEST_P(ASTImporterOptionSpecificTestBase, LambdaInFunctionParam) {
   EXPECT_EQ(ToLSize, FromLSize);
 }
 
+TEST_P(ASTImporterOptionSpecificTestBase, LambdaInGlobalScope) {
+  Decl *FromTU = getTuDecl(
+      R"(
+      auto l1 = [](unsigned lp) { return 1; };
+      auto l2 = [](int lp) { return 2; };
+      int f(int p) {
+        return l1(p) + l2(p);
+      }
+      )",
+      Lang_CXX11, "input0.cc");
+  FunctionDecl *FromF = FirstDeclMatcher<FunctionDecl>().match(
+      FromTU, functionDecl(hasName("f")));
+  FunctionDecl *ToF = Import(FromF, Lang_CXX11);
+  EXPECT_TRUE(ToF);
+}
+
 struct LLDBLookupTest : ASTImporterOptionSpecificTestBase {
   LLDBLookupTest() {
     Creator = [](ASTContext &ToContext, FileManager &ToFileManager,
diff --git a/clang/unittests/AST/StructuralEquivalenceTest.cpp b/clang/unittests/AST/StructuralEquivalenceTest.cpp
index 63757987e113c..cdb55d90b9dfb 100644
--- a/clang/unittests/AST/StructuralEquivalenceTest.cpp
+++ b/clang/unittests/AST/StructuralEquivalenceTest.cpp
@@ -797,6 +797,58 @@ TEST_F(StructuralEquivalenceRecordTest, RecordsWithDifferentBody) {
   EXPECT_FALSE(testStructuralMatch(t));
 }
 
+struct StructuralEquivalenceLambdaTest : StructuralEquivalenceTest {};
+
+TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithDifferentMethods) {
+  // Get the LambdaExprs, unfortunately we can't match directly the underlying
+  // implicit CXXRecordDecl of the Lambda classes.
+  auto t = makeDecls<LambdaExpr>(
+      "void f() { auto L0 = [](int){}; }",
+      "void f() { auto L1 = [](){}; }",
+      Lang_CXX11,
+      lambdaExpr(),
+      lambdaExpr());
+  CXXRecordDecl *L0 = get<0>(t)->getLambdaClass();
+  CXXRecordDecl *L1 = get<1>(t)->getLambdaClass();
+  EXPECT_FALSE(testStructuralMatch(L0, L1));
+}
+
+TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithEqMethods) {
+  auto t = makeDecls<LambdaExpr>(
+      "void f() { auto L0 = [](int){}; }",
+      "void f() { auto L1 = [](int){}; }",
+      Lang_CXX11,
+      lambdaExpr(),
+      lambdaExpr());
+  CXXRecordDecl *L0 = get<0>(t)->getLambdaClass();
+  CXXRecordDecl *L1 = get<1>(t)->getLambdaClass();
+  EXPECT_TRUE(testStructuralMatch(L0, L1));
+}
+
+TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithDifferentFields) {
+  auto t = makeDecls<LambdaExpr>(
+      "void f() { char* X; auto L0 = [X](){}; }",
+      "void f() { float X; auto L1 = [X](){}; }",
+      Lang_CXX11,
+      lambdaExpr(),
+      lambdaExpr());
+  CXXRecordDecl *L0 = get<0>(t)->getLambdaClass();
+  CXXRecordDecl *L1 = get<1>(t)->getLambdaClass();
+  EXPECT_FALSE(testStructuralMatch(L0, L1));
+}
+
+TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithEqFields) {
+  auto t = makeDecls<LambdaExpr>(
+      "void f() { float X; auto L0 = [X](){}; }",
+      "void f() { float X; auto L1 = [X](){}; }",
+      Lang_CXX11,
+      lambdaExpr(),
+      lambdaExpr());
+  CXXRecordDecl *L0 = get<0>(t)->getLambdaClass();
+  CXXRecordDecl *L1 = get<1>(t)->getLambdaClass();
+  EXPECT_TRUE(testStructuralMatch(L0, L1));
+}
+
 TEST_F(StructuralEquivalenceTest, CompareSameDeclWithMultiple) {
   auto t = makeNamedDecls(
       "struct A{ }; struct B{ }; void foo(A a, A b);",

From 87886299b468ccaa0f07d6ee0b237e25c4c35b96 Mon Sep 17 00:00:00 2001
From: Chris Jackson <snortotter@gmail.com>
Date: Wed, 17 Jul 2019 14:54:02 +0000
Subject: [PATCH 351/451] [lld] Add Visual Studio compatible diagnostics

Summary:
Add a --vs-diagnostics flag that alters the format of diagnostic output
to enable source hyperlinks in Visual Studio.

Differential Revision: https://reviews.llvm.org/D58484

Reviewed by: ruiu

llvm-svn: 366333
---
 lld/Common/ErrorHandler.cpp                   | 45 +++++++++++--
 lld/ELF/Driver.cpp                            |  2 +
 lld/ELF/Options.td                            |  3 +
 lld/include/lld/Common/ErrorHandler.h         |  3 +-
 .../ELF/Inputs/vs-diagnostics-duplicate2.s    | 31 +++++++++
 .../ELF/Inputs/vs-diagnostics-duplicate3.s    |  6 ++
 lld/test/ELF/vs-diagnostics-duplicate.s       | 63 +++++++++++++++++++
 .../ELF/vs-diagnostics-dynamic-relocation.s   | 35 +++++++++++
 .../ELF/vs-diagnostics-undefined-symbol-1.s   | 15 +++++
 .../ELF/vs-diagnostics-undefined-symbol-2.s   | 18 ++++++
 .../ELF/vs-diagnostics-undefined-symbol-3.s   | 40 ++++++++++++
 lld/test/ELF/vs-diagnostics-versionscript.s   |  7 +++
 12 files changed, 262 insertions(+), 6 deletions(-)
 create mode 100644 lld/test/ELF/Inputs/vs-diagnostics-duplicate2.s
 create mode 100644 lld/test/ELF/Inputs/vs-diagnostics-duplicate3.s
 create mode 100644 lld/test/ELF/vs-diagnostics-duplicate.s
 create mode 100644 lld/test/ELF/vs-diagnostics-dynamic-relocation.s
 create mode 100644 lld/test/ELF/vs-diagnostics-undefined-symbol-1.s
 create mode 100644 lld/test/ELF/vs-diagnostics-undefined-symbol-2.s
 create mode 100644 lld/test/ELF/vs-diagnostics-undefined-symbol-3.s
 create mode 100644 lld/test/ELF/vs-diagnostics-versionscript.s

diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp
index f5d3eb44848c5..c87c0609b2601 100644
--- a/lld/Common/ErrorHandler.cpp
+++ b/lld/Common/ErrorHandler.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <mutex>
+#include <regex>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
@@ -84,8 +85,42 @@ void lld::checkError(Error e) {
                   [&](ErrorInfoBase &eib) { error(eib.message()); });
 }
 
-void ErrorHandler::print(StringRef s, raw_ostream::Colors c) {
-  *errorOS << logName << ": ";
+static std::string getLocation(std::string msg, std::string defaultMsg) {
+  static std::vector<std::regex> Regexes{
+      std::regex(R"(^undefined symbol:.*\n>>> referenced by (\S+):(\d+)\n.*)"),
+      std::regex(R"(^undefined symbol:.*\n>>> referenced by (.*):)"),
+      std::regex(
+          R"(^duplicate symbol: .*\n>>> defined in (\S+)\n>>> defined in.*)"),
+      std::regex(
+          R"(^duplicate symbol: .*\n>>> defined at (\S+):(\d+).*)"),
+      std::regex(
+          R"(.*\n>>> defined in .*\n>>> referenced by (\S+):(\d+))"),
+      std::regex(
+          R"(^undefined (internal|hidden|protected) symbol: .*\n>>> referenced by (\S+):(\d+)\n.*)"),
+      std::regex(R"((\S+):(\d+): unclosed quote)"),
+  };
+
+  std::smatch Match;
+  for (std::regex &Re : Regexes) {
+    if (std::regex_search(msg, Match, Re)) {
+      return Match.size() > 2 ? Match.str(1) + "(" + Match.str(2) + ")"
+                              : Match.str(1);
+    }
+  }
+  return defaultMsg;
+}
+
+void ErrorHandler::printHeader(StringRef s, raw_ostream::Colors c,
+                               const Twine &msg) {
+
+  if (vsDiagnostics) {
+    // A Visual Studio-style error message starts with an error location.
+    // If a location cannot be extracted then we default to LogName.
+    *errorOS << getLocation(msg.str(), logName) << ": ";
+  } else {
+    *errorOS << logName << ": ";
+  }
+
   if (colorDiagnostics) {
     errorOS->changeColor(c, true);
     *errorOS << s;
@@ -116,7 +151,7 @@ void ErrorHandler::warn(const Twine &msg) {
 
   std::lock_guard<std::mutex> lock(mu);
   newline(errorOS, msg);
-  print("warning: ", raw_ostream::MAGENTA);
+  printHeader("warning: ", raw_ostream::MAGENTA, msg);
   *errorOS << msg << "\n";
 }
 
@@ -125,10 +160,10 @@ void ErrorHandler::error(const Twine &msg) {
   newline(errorOS, msg);
 
   if (errorLimit == 0 || errorCount < errorLimit) {
-    print("error: ", raw_ostream::RED);
+    printHeader("error: ", raw_ostream::RED, msg);
     *errorOS << msg << "\n";
   } else if (errorCount == errorLimit) {
-    print("error: ", raw_ostream::RED);
+    printHeader("error: ", raw_ostream::RED, msg);
     *errorOS << errorLimitExceededMsg << "\n";
     if (exitEarly)
       exitLld(1);
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 98551d2cb34dc..fbfc71d22b7e5 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -786,6 +786,8 @@ static void readConfigs(opt::InputArgList &args) {
   errorHandler().verbose = args.hasArg(OPT_verbose);
   errorHandler().fatalWarnings =
       args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
+  errorHandler().vsDiagnostics =
+      args.hasArg(OPT_visual_studio_diagnostics_format, false);
   threadsEnabled = args.hasFlag(OPT_threads, OPT_no_threads, true);
 
   config->allowMultipleDefinition =
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index d45d9aaad9af9..3ebb46f2e1b2d 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -416,6 +416,9 @@ defm wrap: Eq<"wrap", "Use wrapper functions for symbol">,
 def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"<option>">,
   HelpText<"Linker option extensions">;
 
+def visual_studio_diagnostics_format : F<"vs-diagnostics">,
+HelpText<"Format diagnostics for Visual Studio compatiblity">;
+
 // Aliases
 def: Separate<["-"], "f">, Alias<auxiliary>, HelpText<"Alias for --auxiliary">;
 def: F<"call_shared">, Alias<Bdynamic>, HelpText<"Alias for --Bdynamic">;
diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h
index 3467fdc74da7e..7126a7bf410ae 100644
--- a/lld/include/lld/Common/ErrorHandler.h
+++ b/lld/include/lld/Common/ErrorHandler.h
@@ -91,6 +91,7 @@ class ErrorHandler {
   bool exitEarly = true;
   bool fatalWarnings = false;
   bool verbose = false;
+  bool vsDiagnostics = false;
 
   void error(const Twine &msg);
   LLVM_ATTRIBUTE_NORETURN void fatal(const Twine &msg);
@@ -101,7 +102,7 @@ class ErrorHandler {
   std::unique_ptr<llvm::FileOutputBuffer> outputBuffer;
 
 private:
-  void print(StringRef s, raw_ostream::Colors c);
+  void printHeader(StringRef s, raw_ostream::Colors c, const Twine &msg);
 };
 
 /// Returns the default error handler.
diff --git a/lld/test/ELF/Inputs/vs-diagnostics-duplicate2.s b/lld/test/ELF/Inputs/vs-diagnostics-duplicate2.s
new file mode 100644
index 0000000000000..4edfae09258f1
--- /dev/null
+++ b/lld/test/ELF/Inputs/vs-diagnostics-duplicate2.s
@@ -0,0 +1,31 @@
+.global foo, bar
+
+.text
+foo:
+  nop
+
+.file 1 "duplicate2.s"
+.loc 1 20
+bar:
+  nop
+
+.section .debug_abbrev,"",@progbits
+  .byte  1                      # Abbreviation Code
+  .byte 17                      # DW_TAG_compile_unit
+  .byte  0                      # DW_CHILDREN_no
+  .byte 16                      # DW_AT_stmt_list
+  .byte 23                      # DW_FORM_sec_offset
+  .byte  0                      # EOM(1)
+  .byte  0                      # EOM(2)
+  .byte  0                      # EOM(3)
+
+.section .debug_info,"",@progbits
+  .long .Lend0 - .Lbegin0       # Length of Unit
+.Lbegin0:
+  .short 4                      # DWARF version number
+  .long  .debug_abbrev          # Offset Into Abbrev. Section
+  .byte  8                      # Address Size (in bytes)
+  .byte  1                      # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
+  .long  .debug_line            # DW_AT_stmt_list
+.Lend0:
+  .section .debug_line,"",@progbits
diff --git a/lld/test/ELF/Inputs/vs-diagnostics-duplicate3.s b/lld/test/ELF/Inputs/vs-diagnostics-duplicate3.s
new file mode 100644
index 0000000000000..81829c82bf551
--- /dev/null
+++ b/lld/test/ELF/Inputs/vs-diagnostics-duplicate3.s
@@ -0,0 +1,6 @@
+.file "duplicate3.s"
+
+.global baz
+.text
+baz:
+  nop
diff --git a/lld/test/ELF/vs-diagnostics-duplicate.s b/lld/test/ELF/vs-diagnostics-duplicate.s
new file mode 100644
index 0000000000000..efd0cbe5fb67a
--- /dev/null
+++ b/lld/test/ELF/vs-diagnostics-duplicate.s
@@ -0,0 +1,63 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/vs-diagnostics-duplicate2.s -o %t2.o
+// RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/vs-diagnostics-duplicate3.s -o %t3.o
+// RUN: not ld.lld --vs-diagnostics %t1.o %t2.o %t3.o -o %tout 2>&1 | FileCheck %s
+
+// Case 1. Both symbols have full source location.
+// CHECK:      duplicate.s(15): error: duplicate symbol: bar
+// CHECK-NEXT: >>> defined at duplicate.s:15
+// CHECK-NEXT: >>>{{.*}}1.o:(.text+0x{{.+}})
+// CHECK: >>> defined at duplicate2.s:20
+// CHECK: >>>{{.*}}2.o:(.text+0x{{.+}})
+
+// Case 2. The source locations are unknown for both symbols.
+// CHECK:      {{.*}}ld.lld{{.*}}: error: duplicate symbol: foo
+// CHECK-NEXT: >>> defined at {{.*}}1.o:(.text+0x{{.+}})
+// CHECK-NEXT: >>> defined at {{.*}}2.o:(.text+0x{{.+}})
+
+// Case 3. For the second definition of `baz` we know only the source file found in a STT_FILE symbol.
+// CHECK:      duplicate.s(30): error: duplicate symbol: baz
+// CHECK-NEXT: >>> defined at duplicate.s:30
+// CHECK-NEXT: >>> {{.*}}1.o:(.text+0x{{.+}})
+// CHECK-NEXT: >>> defined at duplicate3.s
+// CHECK-NEXT: >>>            {{.*}}3.o:(.text+0x{{.+}})
+
+.global _start, foo, bar, baz
+.text
+_start:
+  nop
+
+foo:
+  nop
+
+.file 1 "duplicate.s"
+.loc 1 15
+
+bar:
+  nop
+
+.loc 1 30
+baz:
+  nop
+
+.section .debug_abbrev,"",@progbits
+  .byte  1                      # Abbreviation Code
+  .byte 17                      # DW_TAG_compile_unit
+  .byte  0                      # DW_CHILDREN_no
+  .byte 16                      # DW_AT_stmt_list
+  .byte 23                      # DW_FORM_sec_offset
+  .byte  0                      # EOM(1)
+  .byte  0                      # EOM(2)
+  .byte  0                      # EOM(3)
+
+.section .debug_info,"",@progbits
+  .long .Lend0 - .Lbegin0       # Length of Unit
+.Lbegin0:
+  .short 4                      # DWARF version number
+  .long  .debug_abbrev          # Offset Into Abbrev. Section
+  .byte  8                      # Address Size (in bytes)
+  .byte  1                      # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
+  .long  .debug_line            # DW_AT_stmt_list
+.Lend0:
+  .section .debug_line,"",@progbits
diff --git a/lld/test/ELF/vs-diagnostics-dynamic-relocation.s b/lld/test/ELF/vs-diagnostics-dynamic-relocation.s
new file mode 100644
index 0000000000000..6575133eca11a
--- /dev/null
+++ b/lld/test/ELF/vs-diagnostics-dynamic-relocation.s
@@ -0,0 +1,35 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+// RUN: not ld.lld -shared --vs-diagnostics %t.o -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: dyn.s(15): error: can't create dynamic relocation R_X86_64_64 against local symbol in readonly segment; recompile object files with -fPIC or pass '-Wl,-z,notext' to allow text relocations in the output
+// CHECK-NEXT: >>> defined in {{.*}}.o
+// CHECK-NEXT: >>> referenced by dyn.s:15
+// CHECK-NEXT: >>>{{.*}}.o:(.text+0x{{.+}})
+
+.file 1 "dyn.s"
+.loc 1 15
+
+foo:
+.quad foo
+
+.section .debug_abbrev,"",@progbits
+  .byte  1                      # Abbreviation Code
+  .byte 17                      # DW_TAG_compile_unit
+  .byte  0                      # DW_CHILDREN_no
+  .byte 16                      # DW_AT_stmt_list
+  .byte 23                      # DW_FORM_sec_offset
+  .byte  0                      # EOM(1)
+  .byte  0                      # EOM(2)
+  .byte  0                      # EOM(3)
+
+.section .debug_info,"",@progbits
+  .long .Lend0 - .Lbegin0       # Length of Unit
+.Lbegin0:
+  .short 4                      # DWARF version number
+  .long  .debug_abbrev          # Offset Into Abbrev. Section
+  .byte  8                      # Address Size (in bytes)
+  .byte  1                      # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
+  .long  .debug_line            # DW_AT_stmt_list
+.Lend0:
+  .section .debug_line,"",@progbits
diff --git a/lld/test/ELF/vs-diagnostics-undefined-symbol-1.s b/lld/test/ELF/vs-diagnostics-undefined-symbol-1.s
new file mode 100644
index 0000000000000..908549327ecfd
--- /dev/null
+++ b/lld/test/ELF/vs-diagnostics-undefined-symbol-1.s
@@ -0,0 +1,15 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o
+// RUN: not ld.lld --vs-diagnostics %t1.o -o %tout 2>&1 \
+// RUN:   | FileCheck -check-prefix=ERR -check-prefix=CHECK -DFILE=%t1.o %s
+// RUN: ld.lld --vs-diagnostics --warn-unresolved-symbols %t1.o -o %tout 2>&1 \
+// RUN:   | FileCheck -check-prefix=WARN -check-prefix=CHECK -DFILE=%t1.o %s
+
+// ERR:        [[FILE]]: error: undefined symbol: foo
+// WARN:       [[FILE]]: warning: undefined symbol: foo
+// CHECK-NEXT: >>> referenced by {{.*}}1.o:(.text+0x{{.+}})
+
+.global _start, foo
+.text
+_start:
+  jmp foo
\ No newline at end of file
diff --git a/lld/test/ELF/vs-diagnostics-undefined-symbol-2.s b/lld/test/ELF/vs-diagnostics-undefined-symbol-2.s
new file mode 100644
index 0000000000000..3c8c80ef6a252
--- /dev/null
+++ b/lld/test/ELF/vs-diagnostics-undefined-symbol-2.s
@@ -0,0 +1,18 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o
+// RUN: not ld.lld --vs-diagnostics %t1.o -o %tout 2>&1 \
+// RUN:   | FileCheck -check-prefix=ERR -check-prefix=CHECK %s
+// RUN: ld.lld --vs-diagnostics --warn-unresolved-symbols %t1.o -o %tout 2>&1 \
+// RUN:   | FileCheck -check-prefix=WARN -check-prefix=CHECK %s
+
+// ERR:        {{.*}}ld.lld{{.*}}: error: undefined symbol: foo
+// WARN:       {{.*}}ld.lld{{.*}}: warning: undefined symbol: foo
+// CHECK-NEXT: >>> referenced by undef2.s
+// CHECK-NEXT: >>>               {{.*}}1.o:(.text+0x{{.+}})
+
+.file "undef2.s"
+
+.global _start, foo
+.text
+_start:
+  jmp foo
diff --git a/lld/test/ELF/vs-diagnostics-undefined-symbol-3.s b/lld/test/ELF/vs-diagnostics-undefined-symbol-3.s
new file mode 100644
index 0000000000000..3ff9885b7401d
--- /dev/null
+++ b/lld/test/ELF/vs-diagnostics-undefined-symbol-3.s
@@ -0,0 +1,40 @@
+// REQUIRES: x86
+// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t1.o
+// RUN: not ld.lld --vs-diagnostics %t1.o -o %tout 2>&1 \
+// RUN:   | FileCheck -check-prefix=ERR -check-prefix=CHECK %s
+// RUN: ld.lld --vs-diagnostics --warn-unresolved-symbols %t1.o -o %tout 2>&1 \
+// RUN:   | FileCheck -check-prefix=WARN -check-prefix=CHECK %s
+
+// ERR:        undef3.s(15): error: undefined symbol: foo
+// WARN:       undef3.s(15): warning: undefined symbol: foo
+// CHECK:      >>> referenced by undef3.s:15
+// CHECK-NEXT: >>> {{.*}}1.o:(.text+0x{{.+}})
+
+.file 1 "undef3.s"
+
+.global _start, foo
+.text
+_start:
+.loc 1 15
+  jmp foo
+
+.section .debug_abbrev,"",@progbits
+  .byte  1                      # Abbreviation Code
+  .byte 17                      # DW_TAG_compile_unit
+  .byte  0                      # DW_CHILDREN_no
+  .byte 16                      # DW_AT_stmt_list
+  .byte 23                      # DW_FORM_sec_offset
+  .byte  0                      # EOM(1)
+  .byte  0                      # EOM(2)
+  .byte  0                      # EOM(3)
+
+.section .debug_info,"",@progbits
+  .long .Lend0 - .Lbegin0       # Length of Unit
+.Lbegin0:
+  .short 4                      # DWARF version number
+  .long  .debug_abbrev          # Offset Into Abbrev. Section
+  .byte  8                      # Address Size (in bytes)
+  .byte  1                      # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
+  .long  .debug_line            # DW_AT_stmt_list
+.Lend0:
+  .section .debug_line,"",@progbits
diff --git a/lld/test/ELF/vs-diagnostics-versionscript.s b/lld/test/ELF/vs-diagnostics-versionscript.s
new file mode 100644
index 0000000000000..2d0be7fc01b90
--- /dev/null
+++ b/lld/test/ELF/vs-diagnostics-versionscript.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
+# RUN: rm -f %/terr1.script
+# RUN: echo  "\"" > %/terr1.script
+# RUN: not ld.lld --vs-diagnostics  --version-script %/terr1.script -shared %/t.o -o %/t.so 2>&1 | \
+# RUN: FileCheck %s -DSCRIPT="%/terr1.script"
+
+# CHECK: [[SCRIPT]](1): error: [[SCRIPT]]:1: unclosed quote

From 629273ec09825b870ce53e6419296d73519b1e24 Mon Sep 17 00:00:00 2001
From: Owen Reynolds <gbreynoo@gmail.com>
Date: Wed, 17 Jul 2019 15:10:02 +0000
Subject: [PATCH 352/451] [llvm-ar][test] Add tests failing on Darwin

These tests that failed on Darwin but passed on other machines due to the default archive format differing
on a Darwin machine, and what looks to be bugs in the output of this format.
I can not investigate these issue further so the tests are considered expected failures on Darwin.

Differential Revision: https://reviews.llvm.org/D64802

llvm-svn: 366334
---
 llvm/test/tools/llvm-ar/extract.test  | 22 +++++++
 llvm/test/tools/llvm-ar/mri-utf8.test | 19 ++++++
 llvm/test/tools/llvm-ar/print.test    | 84 +++++++++++++++++++++++++++
 3 files changed, 125 insertions(+)
 create mode 100644 llvm/test/tools/llvm-ar/extract.test
 create mode 100644 llvm/test/tools/llvm-ar/mri-utf8.test
 create mode 100644 llvm/test/tools/llvm-ar/print.test

diff --git a/llvm/test/tools/llvm-ar/extract.test b/llvm/test/tools/llvm-ar/extract.test
new file mode 100644
index 0000000000000..a948274328ef6
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/extract.test
@@ -0,0 +1,22 @@
+## Test extract operation.
+# XFAIL: darwin
+
+RUN: rm -rf %t && mkdir -p %t/extracted/
+
+# Extracting from an empty archive should not warn or error:
+RUN: llvm-ar cr %t/empty.a
+RUN: llvm-ar x %t/empty.a 2>&1 | count 0
+
+RUN: echo filea > %t/a.txt
+RUN: echo fileb > %t/b.txt
+RUN: llvm-ar rc %t/archive.a %t/a.txt %t/b.txt
+
+# Single member:
+RUN: cd %t/extracted && llvm-ar x %t/archive.a a.txt
+RUN: diff %t/a.txt %t/extracted/a.txt 
+
+# All members:
+RUN: rm %t/extracted/a.txt
+RUN: cd %t/extracted && llvm-ar x %t/archive.a
+RUN: diff %t/a.txt %t/extracted/a.txt 
+RUN: diff %t/b.txt %t/extracted/b.txt 
diff --git a/llvm/test/tools/llvm-ar/mri-utf8.test b/llvm/test/tools/llvm-ar/mri-utf8.test
new file mode 100644
index 0000000000000..57357abf88e4a
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/mri-utf8.test
@@ -0,0 +1,19 @@
+# Test non-ascii archive members
+# XFAIL: darwin
+
+RUN: rm -rf %t && mkdir -p %t/extracted
+
+RUN: echo "contents" > %t/£.txt
+
+RUN: echo "CREATE %t/mri.ar" > %t/script.mri
+RUN: echo "ADDMOD %t/£.txt" >> %t/script.mri
+RUN: echo "SAVE" >> %t/script.mri
+
+RUN: llvm-ar -M < %t/script.mri
+RUN: cd %t/extracted && llvm-ar x %t/mri.ar
+
+# This works around problems launching processess that
+# include arguments with non-ascii characters.
+# Python on Linux defaults to ASCII encoding unless the
+# environment specifies otherwise, so it is explicitly set.
+RUN: env LANG=en_US.UTF-8 %python -c "assert open(u'\U000000A3.txt', 'rb').read() == b'contents\n'"
diff --git a/llvm/test/tools/llvm-ar/print.test b/llvm/test/tools/llvm-ar/print.test
new file mode 100644
index 0000000000000..10ef78c152dde
--- /dev/null
+++ b/llvm/test/tools/llvm-ar/print.test
@@ -0,0 +1,84 @@
+## Test Print output
+# XFAIL: darwin
+
+RUN: rm -rf %t && mkdir -p %t
+RUN: echo file1 > %t/1.txt
+RUN: echo file2 > %t/2.txt
+RUN: echo file3 > %t/3.txt
+
+RUN: llvm-ar -rc %t/archive.a %t/1.txt %t/2.txt %t/3.txt
+
+# Print without member:
+RUN: llvm-ar p %t/archive.a \
+RUN:   | FileCheck %s --check-prefix=WITHOUT --match-full-lines --implicit-check-not {{.}}
+
+WITHOUT:      file1
+WITHOUT-NEXT: file2
+WITHOUT-NEXT: file3
+
+RUN: llvm-ar pv %t/archive.a \
+RUN:   | FileCheck %s --check-prefix=WITHOUT-VERBOSE --match-full-lines --implicit-check-not {{.}}
+
+WITHOUT-VERBOSE:      Printing 1.txt
+WITHOUT-VERBOSE-NEXT: file1
+WITHOUT-VERBOSE-NEXT: Printing 2.txt
+WITHOUT-VERBOSE-NEXT: file2
+WITHOUT-VERBOSE-NEXT: Printing 3.txt
+WITHOUT-VERBOSE-NEXT: file3
+
+# Print single member:
+RUN: llvm-ar p %t/archive.a %t/2.txt  \
+RUN:   | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}}
+
+SINGLE: file2
+
+RUN: llvm-ar pv %t/archive.a %t/2.txt  \
+RUN:   | FileCheck %s --check-prefix=SINGLE-VERBOSE --match-full-lines --implicit-check-not {{.}}
+
+SINGLE-VERBOSE:      Printing 2.txt
+SINGLE-VERBOSE-NEXT: file2
+
+# Print multiple members:
+RUN: llvm-ar p %t/archive.a %t/2.txt %t/1.txt \
+RUN:   | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}}
+
+MULTIPLE:      file1
+MULTIPLE-NEXT: file2
+
+RUN: llvm-ar pv %t/archive.a %t/2.txt %t/1.txt \
+RUN:   | FileCheck %s --check-prefix=MULTIPLE-VERBOSE --match-full-lines --implicit-check-not {{.}}
+
+MULTIPLE-VERBOSE:      Printing 1.txt
+MULTIPLE-VERBOSE-NEXT: file1
+MULTIPLE-VERBOSE-NEXT: Printing 2.txt
+MULTIPLE-VERBOSE-NEXT: file2
+
+# Print same member:
+RUN: not llvm-ar p %t/archive.a %t/2.txt %t/2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.txt
+
+SAME-DAG: file2
+SAME-DAG: error: '[[FILE]]' was not found
+
+# Print same member when containing multiple members with shared name:
+llvm-ar -q %t/archive.a %t/2.txt
+RUN: not llvm-ar p %t/archive.a %t/2.txt %t/2.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=SAME -DFILE=%t/2.txt
+
+# No archive:
+RUN: not llvm-ar p 2>&1 \
+RUN:   | FileCheck %s --check-prefix=NO-ARCHIVE
+
+NO-ARCHIVE: error: An archive name must be specified.
+
+# Archive does not exist:
+RUN: not llvm-ar p %t/missing.a 2>&1 \
+RUN:   | FileCheck %s --check-prefix=MISSING-ARCHIVE -DARCHIVE=%t/missing.a
+
+MISSING-ARCHIVE: error: error loading '[[ARCHIVE]]': {{[nN]}}o such file or directory.
+
+# Member does not exist:
+RUN: not llvm-ar p %t/archive.a %t-missing.txt 2>&1 \
+RUN:   | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t-missing.txt
+
+MISSING-FILE: error: '[[FILE]]' was not found

From 11d3710c1cd25adf23a4690076e12d6ece267731 Mon Sep 17 00:00:00 2001
From: Hideto Ueno <uenoku.tokotoko@gmail.com>
Date: Wed, 17 Jul 2019 15:15:43 +0000
Subject: [PATCH 353/451] [Attributor] Deduce "willreturn" function attribute

Summary:
Deduce the "willreturn" attribute for functions.

For now, intrinsics are not willreturn. More annotation will be done in another patch.

Reviewers: jdoerfert

Subscribers: jvesely, nhaehnle, nicholas, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63046

llvm-svn: 366335
---
 llvm/include/llvm/IR/Intrinsics.td            |   2 +
 llvm/include/llvm/Transforms/IPO/Attributor.h |  44 +++++++
 llvm/lib/Transforms/IPO/Attributor.cpp        | 120 ++++++++++++++++++
 .../Transforms/FunctionAttrs/arg_returned.ll  |   2 +-
 .../Transforms/FunctionAttrs/willreturn.ll    |  95 +++++++++++++-
 llvm/utils/TableGen/CodeGenIntrinsics.h       |   3 +
 llvm/utils/TableGen/CodeGenTarget.cpp         |   3 +
 llvm/utils/TableGen/IntrinsicEmitter.cpp      |  14 +-
 8 files changed, 274 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 8276d7535c3b2..d660f82784370 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -94,6 +94,8 @@ class ReadNone<int argNo> : IntrinsicProperty {
 
 def IntrNoReturn : IntrinsicProperty;
 
+def IntrWillReturn : IntrinsicProperty;
+
 // IntrCold - Calls to this intrinsic are cold.
 // Parallels the cold attribute on LLVM IR functions.
 def IntrCold : IntrinsicProperty;
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 3f6a2b6a24e9d..5dbe21ac5e4e4 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -740,6 +740,50 @@ struct AANonNull : public AbstractAttribute {
   /// The identifier used by the Attributor for this class of attributes.
   static constexpr Attribute::AttrKind ID = Attribute::NonNull;
 };
+
+/// An abstract attribute for norecurse.
+struct AANoRecurse : public AbstractAttribute {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AANoRecurse(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::getAttrKind()
+  virtual Attribute::AttrKind getAttrKind() const override {
+    return Attribute::NoRecurse;
+  }
+
+  /// Return true if "norecurse" is known.
+  virtual bool isKnownNoRecurse() const = 0;
+
+  /// Return true if "norecurse" is assumed.
+  virtual bool isAssumedNoRecurse() const = 0;
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::NoRecurse;
+};
+
+/// An abstract attribute for willreturn.
+struct AAWillReturn : public AbstractAttribute {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAWillReturn(Value &V, InformationCache &InfoCache)
+      : AbstractAttribute(V, InfoCache) {}
+
+  /// See AbstractAttribute::getAttrKind()
+  virtual Attribute::AttrKind getAttrKind() const override {
+    return Attribute::WillReturn;
+  }
+
+  /// Return true if "willreturn" is known.
+  virtual bool isKnownWillReturn() const = 0;
+
+  /// Return true if "willreturn" is assumed.
+  virtual bool isAssumedWillReturn() const = 0;
+
+  /// The identifier used by the Attributor for this class of attributes.
+  static constexpr Attribute::AttrKind ID = Attribute::WillReturn;
+};
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 5d18e40b0b92c..2f31b3924a9a2 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Transforms/IPO/Attributor.h"
 
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -23,6 +24,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
@@ -56,6 +58,7 @@ STATISTIC(NumFnReturnedNonNull,
           "Number of function return values marked nonnull");
 STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull");
 STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull");
+STATISTIC(NumFnWillReturn, "Number of functions marked willreturn");
 
 // TODO: Determine a good default value.
 //
@@ -128,6 +131,9 @@ static void bookkeeping(AbstractAttribute::ManifestPosition MP,
       break;
     }
     break;
+  case Attribute::WillReturn:
+    NumFnWillReturn++;
+    break;
   default:
     return;
   }
@@ -1194,6 +1200,117 @@ ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) {
   return ChangeStatus::UNCHANGED;
 }
 
+/// ------------------------ Will-Return Attributes ----------------------------
+
+struct AAWillReturnImpl : public AAWillReturn, BooleanState {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAWillReturnImpl(Function &F, InformationCache &InfoCache)
+      : AAWillReturn(F, InfoCache) {}
+
+  /// See AAWillReturn::isKnownWillReturn().
+  bool isKnownWillReturn() const override { return getKnown(); }
+
+  /// See AAWillReturn::isAssumedWillReturn().
+  bool isAssumedWillReturn() const override { return getAssumed(); }
+
+  /// See AbstractAttribute::getState(...).
+  AbstractState &getState() override { return *this; }
+
+  /// See AbstractAttribute::getState(...).
+  const AbstractState &getState() const override { return *this; }
+
+  /// See AbstractAttribute::getAsStr()
+  const std::string getAsStr() const override {
+    return getAssumed() ? "willreturn" : "may-noreturn";
+  }
+};
+
+struct AAWillReturnFunction final : AAWillReturnImpl {
+
+  /// See AbstractAttribute::AbstractAttribute(...).
+  AAWillReturnFunction(Function &F, InformationCache &InfoCache)
+      : AAWillReturnImpl(F, InfoCache) {}
+
+  /// See AbstractAttribute::getManifestPosition().
+  ManifestPosition getManifestPosition() const override {
+    return MP_FUNCTION;
+  }
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override;
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override;
+};
+
+// Helper function that checks whether a function has any cycle.
+// TODO: Replace with more efficent code
+bool containsCycle(Function &F) {
+  SmallPtrSet<BasicBlock *, 32> Visited;
+
+  // Traverse BB by dfs and check whether successor is already visited.
+  for (BasicBlock *BB : depth_first(&F)) {
+    Visited.insert(BB);
+    for (auto *SuccBB : successors(BB)) {
+      if (Visited.count(SuccBB))
+        return true;
+    }
+  }
+  return false;
+}
+
+// Helper function that checks the function have a loop which might become an
+// endless loop
+// FIXME: Any cycle is regarded as endless loop for now.
+//        We have to allow some patterns.
+bool containsPossiblyEndlessLoop(Function &F) { return containsCycle(F); }
+
+void AAWillReturnFunction::initialize(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  if (containsPossiblyEndlessLoop(F))
+    indicatePessimisticFixpoint();
+}
+
+ChangeStatus AAWillReturnFunction::updateImpl(Attributor &A) {
+  Function &F = getAnchorScope();
+
+  // The map from instruction opcodes to those instructions in the function.
+  auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
+
+  for (unsigned Opcode :
+       {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
+        (unsigned)Instruction::Call}) {
+    for (Instruction *I : OpcodeInstMap[Opcode]) {
+      auto ICS = ImmutableCallSite(I);
+
+      if (ICS.hasFnAttr(Attribute::WillReturn))
+        continue;
+
+      auto *WillReturnAA = A.getAAFor<AAWillReturn>(*this, *I);
+      if (!WillReturnAA || !WillReturnAA->isAssumedWillReturn()) {
+        indicatePessimisticFixpoint();
+        return ChangeStatus::CHANGED;
+      }
+
+      auto *NoRecurseAA = A.getAAFor<AANoRecurse>(*this, *I);
+
+      // FIXME: (i) Prohibit any recursion for now.
+      //        (ii) AANoRecurse isn't implemented yet so currently any call is
+      //        regarded as having recursion.
+      //       Code below should be
+      //       if ((!NoRecurseAA || !NoRecurseAA->isAssumedNoRecurse()) &&
+      if (!NoRecurseAA && !ICS.hasFnAttr(Attribute::NoRecurse)) {
+        indicatePessimisticFixpoint();
+        return ChangeStatus::CHANGED;
+      }
+    }
+  }
+
+  return ChangeStatus::UNCHANGED;
+}
+
 /// ----------------------------------------------------------------------------
 ///                               Attributor
 /// ----------------------------------------------------------------------------
@@ -1403,6 +1520,9 @@ void Attributor::identifyDefaultAbstractAttributes(
       registerAA(*new AANonNullArgument(Arg, InfoCache));
   }
 
+  // Every function might be "will-return".
+  registerAA(*new AAWillReturnFunction(F, InfoCache));
+
   // Walk all instructions to find more attribute opportunities and also
   // interesting instructions that might be queried by abstract attributes
   // during their initialization or update.
diff --git a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
index fa2aee85c96b7..a5699fc1fc09d 100644
--- a/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
+++ b/llvm/test/Transforms/FunctionAttrs/arg_returned.ll
@@ -744,7 +744,7 @@ unreachableblock2:
 attributes #0 = { noinline nounwind uwtable }
 
 ; BOTH-NOT: attributes #
-; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noinline norecurse nosync nounwind readnone uwtable }
+; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noinline norecurse nosync nounwind readnone uwtable willreturn }
 ; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noinline nosync nounwind readnone uwtable }
 ; BOTH-DAG: attributes #{{[0-9]*}} = { nofree noinline nosync nounwind readonly uwtable }
 ; BOTH-DAG: attributes #{{[0-9]*}} = { noinline nounwind uwtable }
diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn.ll b/llvm/test/Transforms/FunctionAttrs/willreturn.ll
index b9a3cf926abde..e2b76b156ea2d 100644
--- a/llvm/test/Transforms/FunctionAttrs/willreturn.ll
+++ b/llvm/test/Transforms/FunctionAttrs/willreturn.ll
@@ -1,4 +1,6 @@
 ; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR
+; RUN: opt -attributor --attributor-disable=false -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR
+
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -7,9 +9,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 
 ; TEST 1 (positive case)
-; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable
 ; FNATTR-NEXT: define void @only_return()
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable willreturn
+; ATTRIBUTOR-NEXT: define void @only_return()
 define void @only_return() #0 {
     ret void
 }
@@ -22,9 +25,11 @@ define void @only_return() #0 {
 ;    return n<=1? n : fib(n-1) + fib(n-2);
 ; }
 
-; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline nounwind readnone uwtable
 ; FNATTR-NEXT: define i32 @fib(i32)
+; FIXME: missing willreturn
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NEXT: define i32 @fib(i32) local_unnamed_addr
 define i32 @fib(i32) local_unnamed_addr #0 {
   %2 = icmp slt i32 %0, 2
   br i1 %2, label %9, label %3
@@ -54,6 +59,9 @@ define i32 @fib(i32) local_unnamed_addr #0 {
 ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define i32 @fact_maybe_not_halt(i32) local_unnamed_addr
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define i32 @fact_maybe_not_halt(i32) local_unnamed_addr
 define i32 @fact_maybe_not_halt(i32) local_unnamed_addr #0 {
   %2 = icmp eq i32 %0, 0
   br i1 %2, label %11, label %3
@@ -87,6 +95,8 @@ define i32 @fact_maybe_not_halt(i32) local_unnamed_addr #0 {
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable
 ; FNATTR-NEXT: define i32 @fact_loop(i32)
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NEXT: define i32 @fact_loop(i32) local_unnamed_addr
 define i32 @fact_loop(i32) local_unnamed_addr #0 {
   %2 = icmp slt i32 %0, 1
   br i1 %2, label %3, label %5
@@ -116,6 +126,9 @@ define i32 @fact_loop(i32) local_unnamed_addr #0 {
 ; FNATTR: Function Attrs: noinline nounwind readnone uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @mutual_recursion1()
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @mutual_recursion1()
 define void @mutual_recursion1() #0 {
   call void @mutual_recursion2()
   ret void
@@ -125,6 +138,9 @@ define void @mutual_recursion1() #0 {
 ; FNATTR: Function Attrs: noinline nounwind readnone uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @mutual_recursion2()
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @mutual_recursion2()
 define void @mutual_recursion2() #0 {
   call void @mutual_recursion1()
   ret void
@@ -135,11 +151,16 @@ define void @mutual_recursion2() #0 {
 ; call exit/abort (has noreturn attribute)
 ; FNATTR: Function Attrs: noreturn
 ; FNATTR-NEXT: declare void @exit(i32) local_unnamed_addr
+; ATTRIBUTOR: Function Attrs: noreturn
+; ATTRIBUTOR-NEXT: declare void @exit(i32) local_unnamed_add
 declare void @exit(i32) local_unnamed_addr noreturn
 
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @only_exit()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @only_exit() local_unnamed_addr
 define void @only_exit() local_unnamed_addr #0 {
   tail call void @exit(i32 0)
   unreachable
@@ -158,6 +179,9 @@ define void @only_exit() local_unnamed_addr #0 {
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @conditional_exit(i32, i32* nocapture readonly) local_unnamed_addr
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @conditional_exit(i32, i32* nocapture readonly) local_unnamed_addr
 define void @conditional_exit(i32, i32* nocapture readonly) local_unnamed_addr #0 {
   %3 = icmp eq i32 %0, 0
   br i1 %3, label %5, label %4
@@ -181,13 +205,18 @@ define void @conditional_exit(i32, i32* nocapture readonly) local_unnamed_addr #
 
 ; TEST 6 (positive case)
 ; Call intrinsic function
+; FIXME: missing willreturn
 ; FNATTRS: Function Attrs: noinline readnone speculatable
 ; FNATTRS-NEXT: declare float @llvm.floor.f32(float)
+; ATTRIBUTOR: Function Attrs: nounwind readnone speculatable
+; ATTRIBUTOR-NEXT: declare float @llvm.floor.f32(float)
 declare float @llvm.floor.f32(float)
 
-; FIXME: missing willreturn
 ; FNATTRS: Function Attrs: noinline nounwind uwtable
 ; FNATTRS-NEXT: define void @call_floor(float %a)
+; FIXME: missing willreturn
+; ATTRIBUTOR: Function Attrs: noinline nosync nounwind uwtable
+; ATTRIBUTOR-NEXT: define void @call_floor(float %a)
 define void @call_floor(float %a) #0 {
     tail call float @llvm.floor.f32(float %a)
     ret void
@@ -200,11 +229,17 @@ define void @call_floor(float %a) #0 {
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: declare void @maybe_noreturn()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: declare void @maybe_noreturn()
 declare void @maybe_noreturn() #0
 
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @call_maybe_noreturn()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @call_maybe_noreturn()
 define void @call_maybe_noreturn() #0 {
     tail call void @maybe_noreturn()
     ret void
@@ -216,11 +251,15 @@ define void @call_maybe_noreturn() #0 {
 
 ; FNATTR: Function Attrs: willreturn
 ; FNATTR-NEXT: declare void @will_return()
+; ATTRIBUTOR: Function Attrs: willreturn
+; ATTRIBUTOR-NEXT: declare void @will_return()
 declare void @will_return() willreturn
 
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NEXT: define void @f1()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NEXT: define void @f1()
 define void @f1() #0 {
     tail call void @will_return()
     ret void
@@ -229,6 +268,9 @@ define void @f1() #0 {
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NEXT: define void @f2()
+; FIXME: missing willreturn
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NEXT: define void @f2()
 define void @f2() #0 {
     tail call void @f1()
     ret void
@@ -241,6 +283,9 @@ define void @f2() #0 {
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @call_will_return_but_has_loop()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @call_will_return_but_has_loop()
 define void @call_will_return_but_has_loop() #0 {
   br label %label1
 label1:
@@ -256,14 +301,17 @@ label2:
 
 ; FNATTR: Function Attrs: noinline uwtable willreturn
 ; FNATTR-NEXT: declare i1 @maybe_raise_exception()
+; ATTRIBUTOR: Function Attrs: noinline uwtable willreturn
+; ATTRIBUTOR-NEXT: declare i1 @maybe_raise_exception()
 declare i1 @maybe_raise_exception() #1 willreturn
 
-; FIXME: missing willreturn
 ; FNATTR: Function Attrs: nounwind
 ; FNATTR-NEXT: define void @invoke_test()
+; ATTRIBUTOR: Function Attrs: nounwind willreturn
+; ATTRIBUTOR-NEXT: define void @invoke_test()
 define void @invoke_test() personality i32 (...)* @__gxx_personality_v0 {
   invoke i1 @maybe_raise_exception()
-			to label %N unwind label %F
+      to label %N unwind label %F
   N:
     ret void
   F:
@@ -288,6 +336,8 @@ declare i32 @__gxx_personality_v0(...)
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline norecurse nounwind readonly uwtable
 ; FNATTR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture readonly)
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NEXT: define i32 @loop_constant_trip_count(i32* nocapture readonly)
 define i32 @loop_constant_trip_count(i32* nocapture readonly) #0 {
   br label %3
 
@@ -319,6 +369,9 @@ define i32 @loop_constant_trip_count(i32* nocapture readonly) #0 {
 ; FNATTR: Function Attrs: noinline norecurse nounwind readonly uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define i32 @loop_trip_count_unbound(i32, i32, i32* nocapture readonly, i32) local_unnamed_addr
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define i32 @loop_trip_count_unbound(i32, i32, i32* nocapture readonly, i32) local_unnamed_addr
 define i32 @loop_trip_count_unbound(i32, i32, i32* nocapture readonly, i32) local_unnamed_addr #0 {
   %5 = icmp eq i32 %0, %1
   br i1 %5, label %6, label %8
@@ -354,6 +407,8 @@ define i32 @loop_trip_count_unbound(i32, i32, i32* nocapture readonly, i32) loca
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline norecurse nounwind readonly uwtable
 ; FNATTR-NEXT: define i32 @loop_trip_dec(i32, i32* nocapture readonly)
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable
+; ATTRIBUTOR-NEXT: define i32 @loop_trip_dec(i32, i32* nocapture readonly) local_unnamed_addr
 
 define i32 @loop_trip_dec(i32, i32* nocapture readonly) local_unnamed_addr #0 {
   %3 = icmp sgt i32 %0, -1
@@ -381,9 +436,10 @@ define i32 @loop_trip_dec(i32, i32* nocapture readonly) local_unnamed_addr #0 {
 ; TEST 14 (positive case)
 ; multiple return
 
-; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline norecurse nounwind readnone uwtable
 ; FNATTR-NEXT: define i32 @multiple_return(i32 %a)
+; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind uwtable willreturn
+; ATTRIBUTOR-NEXT: define i32 @multiple_return(i32 %a)
 define i32 @multiple_return(i32 %a) #0 {
   %b =  icmp eq i32 %a, 0
   br i1 %b, label %t, label %f
@@ -401,6 +457,8 @@ f:
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NEXT: define void @unreachable_exit_positive1()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NEXT: define void @unreachable_exit_positive1()
 define void @unreachable_exit_positive1() #0 {
   tail call void @will_return()
   ret void
@@ -413,6 +471,8 @@ unreachable_label:
 ; FIXME: missing willreturn
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NEXT: define i32 @unreachable_exit_positive2(i32)
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NEXT: define i32 @unreachable_exit_positive2(i32)
 define i32 @unreachable_exit_positive2(i32) local_unnamed_addr #0 {
   %2 = icmp slt i32 %0, 1
   br i1 %2, label %3, label %5
@@ -440,6 +500,9 @@ unreachable_label:
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @unreachable_exit_negative1()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @unreachable_exit_negative1()
 define void @unreachable_exit_negative1() #0 {
   tail call void @exit(i32 0)
   ret void
@@ -452,6 +515,9 @@ unreachable_label:
 ; FNATTR: Function Attrs: noinline nounwind uwtable
 ; FNATTR-NOT: willreturn
 ; FNATTR-NEXT: define void @unreachable_exit_negative2()
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @unreachable_exit_negative2()
 define void @unreachable_exit_negative2() #0 {
 
   br label %L1
@@ -465,6 +531,23 @@ unreachable_label:
   unreachable
 }
 
+; FNATTR: Function Attrs: noreturn nounwind
+; FNATTR-NEXT: declare void @llvm.eh.sjlj.longjmp(i8*)
+; ATTRIBUTOR: Function Attrs: noreturn nounwind
+; ATTRIBUTOR-NEXT: declare void @llvm.eh.sjlj.longjmp(i8*)
+declare void @llvm.eh.sjlj.longjmp(i8*)
+
+; FNATTR: Function Attrs: noinline nounwind uwtable
+; FNATTR-NOT: willreturn
+; FNATTR-NEXT: define void @call_longjmp(i8* nocapture readnone) local_unnamed_addr #3 {
+; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable
+; ATTRIBUTOR-NOT: willreturn
+; ATTRIBUTOR-NEXT: define void @call_longjmp(i8* nocapture readnone) local_unnamed_addr
+define void @call_longjmp(i8* nocapture readnone) local_unnamed_addr #0 {
+  tail call void @llvm.eh.sjlj.longjmp(i8* %0)
+  ret void
+}
+
 
 attributes #0 = { nounwind uwtable noinline }
 attributes #1 = { uwtable noinline }
diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h
index 9dcd136463cc7..7b74bb07d6e01 100644
--- a/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -123,6 +123,9 @@ struct CodeGenIntrinsic {
   /// True if the intrinsic is no-return.
   bool isNoReturn;
 
+  /// True if the intrinsic is will-return.
+  bool isWillReturn;
+
   /// True if the intrinsic is cold.
   bool isCold;
 
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 702317283f908..b65e1b6af7919 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -557,6 +557,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
   isCommutative = false;
   canThrow = false;
   isNoReturn = false;
+  isWillReturn = false;
   isCold = false;
   isNoDuplicate = false;
   isConvergent = false;
@@ -721,6 +722,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       isConvergent = true;
     else if (Property->getName() == "IntrNoReturn")
       isNoReturn = true;
+    else if (Property->getName() == "IntrWillReturn")
+      isWillReturn = true;
     else if (Property->getName() == "IntrCold")
       isCold = true;
     else if (Property->getName() == "IntrSpeculatable")
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 6bcdc3d777a2d..979af98f67682 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -545,6 +545,9 @@ struct AttributeComparator {
     if (L->isNoReturn != R->isNoReturn)
       return R->isNoReturn;
 
+    if (L->isWillReturn != R->isWillReturn)
+      return R->isWillReturn;
+
     if (L->isCold != R->isCold)
       return R->isCold;
 
@@ -686,8 +689,9 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
 
     if (!intrinsic.canThrow ||
         (intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem && !intrinsic.hasSideEffects) ||
-        intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate ||
-        intrinsic.isConvergent || intrinsic.isSpeculatable) {
+        intrinsic.isNoReturn || intrinsic.isWillReturn || intrinsic.isCold ||
+        intrinsic.isNoDuplicate || intrinsic.isConvergent ||
+        intrinsic.isSpeculatable) {
       OS << "      const Attribute::AttrKind Atts[] = {";
       bool addComma = false;
       if (!intrinsic.canThrow) {
@@ -700,6 +704,12 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
         OS << "Attribute::NoReturn";
         addComma = true;
       }
+      if (intrinsic.isWillReturn) {
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::WillReturn";
+        addComma = true;
+      }
       if (intrinsic.isCold) {
         if (addComma)
           OS << ",";

From 552c2c09d354a3ad9c1c9647e0a3bb5099c31088 Mon Sep 17 00:00:00 2001
From: Mike Rice <michael.p.rice@intel.com>
Date: Wed, 17 Jul 2019 15:18:45 +0000
Subject: [PATCH 354/451] [OPENMP]Fix crash in LoopCounterRefChecker when
 MemberExpr is not Var or Field

checkDecl is only valid for VarDecls or FieldDecls, since getCanonicalDecl
expects only these. Prevent other Decl kinds (such as CXXMethodDecls and
EnumConstantDecls) from entering and asserting.

Differential Revision: https://reviews.llvm.org/D64842

llvm-svn: 366336
---
 clang/lib/Sema/SemaOpenMP.cpp           | 3 ++-
 clang/test/OpenMP/for_loop_messages.cpp | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 222d042b6da59..bd68011c18b23 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4992,7 +4992,8 @@ class LoopCounterRefChecker final
   bool VisitMemberExpr(const MemberExpr *E) {
     if (isa<CXXThisExpr>(E->getBase()->IgnoreParens())) {
       const ValueDecl *VD = E->getMemberDecl();
-      return checkDecl(E, VD);
+      if (isa<VarDecl>(VD) || isa<FieldDecl>(VD))
+        return checkDecl(E, VD);
     }
     return false;
   }
diff --git a/clang/test/OpenMP/for_loop_messages.cpp b/clang/test/OpenMP/for_loop_messages.cpp
index 7c2663f61816f..f5f6d0b703136 100644
--- a/clang/test/OpenMP/for_loop_messages.cpp
+++ b/clang/test/OpenMP/for_loop_messages.cpp
@@ -626,6 +626,8 @@ template <typename IT, int ST>
 class TC {
   int ii, iii, kk;
 public:
+  enum { myconstant = 42 };
+  int ub();
   int dotest_lt(IT begin, IT end) {
 #pragma omp parallel
 // expected-error@+3 3 {{the loop initializer expression depends on the current loop control variable}}
@@ -634,6 +636,12 @@ class TC {
   for (ii = ii * 10 + 25; ii < ii / ii - 23; ii += 1)
     ;
 
+// Check that member function calls and enum constants in the condition is
+// handled.
+#pragma omp for
+  for (ii = 0; ii < ub() + this->myconstant; ii += 1) // expected-no-error
+    ;
+
 #pragma omp parallel
 // expected-error@+4 2 {{expected loop invariant expression or '<invariant1> * ii + <invariant2>' kind of expression}}
 // expected-error@+3 {{expected loop invariant expression or '<invariant1> * TC::ii + <invariant2>' kind of expression}}

From f81ee439a409973279244f093dd0264592b74a38 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov@google.com>
Date: Wed, 17 Jul 2019 15:22:14 +0000
Subject: [PATCH 355/451] [clang-tidy] Adjust location of namespace comment
 diagnostic

Summary:
If there is no comment, place it at the closing brace of a namespace
definition. Previously it was placed at the next character after the
closing brace.

The new position produces a better location for highlighting in clangd
and does not seem to make matters worse for clang-tidy.

Reviewers: alexfh, hokein

Reviewed By: alexfh, hokein

Subscribers: xazax.hun, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64861

llvm-svn: 366337
---
 .../clang-tidy/readability/NamespaceCommentCheck.cpp     | 9 ++++++++-
 .../clang-tidy/google-readability-namespace-comments.cpp | 6 +++---
 .../google-readability-nested-namespace-comments.cpp     | 4 ++--
 clang-tools-extra/test/clang-tidy/select-checks.cpp      | 2 +-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp
index 6428f8cdc9893..eb3d7c505b831 100644
--- a/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.cpp
@@ -9,6 +9,7 @@
 #include "NamespaceCommentCheck.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/StringExtras.h"
 
@@ -181,7 +182,13 @@ void NamespaceCommentCheck::check(const MatchFinder::MatchResult &Result) {
           ? "anonymous namespace"
           : ("namespace '" + NestedNamespaceName.str() + "'");
 
-  diag(AfterRBrace, Message)
+  // Place diagnostic at an old comment, or closing brace if we did not have it.
+  SourceLocation DiagLoc =
+      OldCommentRange.getBegin() != OldCommentRange.getEnd()
+          ? OldCommentRange.getBegin()
+          : ND->getRBraceLoc();
+
+  diag(DiagLoc, Message)
       << NamespaceName
       << FixItHint::CreateReplacement(
              CharSourceRange::getCharRange(OldCommentRange),
diff --git a/clang-tools-extra/test/clang-tidy/google-readability-namespace-comments.cpp b/clang-tools-extra/test/clang-tidy/google-readability-namespace-comments.cpp
index 9abb984bcbf79..591c9dae5a74e 100644
--- a/clang-tools-extra/test/clang-tidy/google-readability-namespace-comments.cpp
+++ b/clang-tools-extra/test/clang-tidy/google-readability-namespace-comments.cpp
@@ -7,9 +7,9 @@ namespace n2 {
 void f(); // So that the namespace isn't empty.
 
 
-// CHECK-MESSAGES: :[[@LINE+4]]:2: warning: namespace 'n2' not terminated with a closing comment [google-readability-namespace-comments]
+// CHECK-MESSAGES: :[[@LINE+4]]:1: warning: namespace 'n2' not terminated with a closing comment [google-readability-namespace-comments]
 // CHECK-MESSAGES: :[[@LINE-7]]:11: note: namespace 'n2' starts here
-// CHECK-MESSAGES: :[[@LINE+2]]:3: warning: namespace 'n1' not terminated with
+// CHECK-MESSAGES: :[[@LINE+2]]:2: warning: namespace 'n1' not terminated with
 // CHECK-MESSAGES: :[[@LINE-10]]:11: note: namespace 'n1' starts here
 }}
 // CHECK-FIXES: }  // namespace n2
@@ -25,7 +25,7 @@ void f(); // So that the namespace isn't empty.
 // 5
 // 6
 // 7
-// CHECK-MESSAGES: :[[@LINE+2]]:2: warning: namespace 'macro_expansion' not terminated with
+// CHECK-MESSAGES: :[[@LINE+2]]:1: warning: namespace 'macro_expansion' not terminated with
 // CHECK-MESSAGES: :[[@LINE-10]]:11: note: namespace 'macro_expansion' starts here
 }
 // CHECK-FIXES: }  // namespace macro_expansion
diff --git a/clang-tools-extra/test/clang-tidy/google-readability-nested-namespace-comments.cpp b/clang-tools-extra/test/clang-tidy/google-readability-nested-namespace-comments.cpp
index d7765c63820d1..017081d2e9520 100644
--- a/clang-tools-extra/test/clang-tidy/google-readability-nested-namespace-comments.cpp
+++ b/clang-tools-extra/test/clang-tidy/google-readability-nested-namespace-comments.cpp
@@ -7,9 +7,9 @@ namespace n3 {
 void f();
 
 
-// CHECK-MESSAGES: :[[@LINE+4]]:2: warning: namespace 'n3' not terminated with
+// CHECK-MESSAGES: :[[@LINE+4]]:1: warning: namespace 'n3' not terminated with
 // CHECK-MESSAGES: :[[@LINE-7]]:11: note: namespace 'n3' starts here
-// CHECK-MESSAGES: :[[@LINE+2]]:3: warning: namespace 'n1::n2' not terminated with a closing comment [google-readability-namespace-comments]
+// CHECK-MESSAGES: :[[@LINE+2]]:2: warning: namespace 'n1::n2' not terminated with a closing comment [google-readability-namespace-comments]
 // CHECK-MESSAGES: :[[@LINE-10]]:11: note: namespace 'n1::n2' starts here
 }}
 // CHECK-FIXES: }  // namespace n3
diff --git a/clang-tools-extra/test/clang-tidy/select-checks.cpp b/clang-tools-extra/test/clang-tidy/select-checks.cpp
index 791def75b18bc..46bf43ca0c3d9 100644
--- a/clang-tools-extra/test/clang-tidy/select-checks.cpp
+++ b/clang-tools-extra/test/clang-tidy/select-checks.cpp
@@ -5,7 +5,7 @@
 
 namespace i {
 }
-// CHECK: :[[@LINE-1]]:2: warning: namespace 'i' not terminated with a closing comment [llvm-namespace-comment]
+// CHECK: :[[@LINE-1]]:1: warning: namespace 'i' not terminated with a closing comment [llvm-namespace-comment]
 
 // Expect no warnings from the google-explicit-constructor check:
 class A { A(int i); };

From e61d0257ede2bf597440f516ecd9ff1146bf7b06 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Wed, 17 Jul 2019 15:26:49 +0000
Subject: [PATCH 356/451] [clangd] Type hierarchy: don't resolve parents if the
 client only asked for children

Summary: Also reorganize the code for computing supertypes to make it more symmetric to subtypes.

Reviewers: kadircet

Reviewed By: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64613

llvm-svn: 366338
---
 clang-tools-extra/clangd/XRefs.cpp            | 33 ++++++++++---------
 .../clangd/unittests/TypeHierarchyTests.cpp   |  3 +-
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index 59f07ee405eed..6339f8643f745 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -1132,15 +1132,9 @@ static void fillSubTypes(const SymbolID &ID,
 
 using RecursionProtectionSet = llvm::SmallSet<const CXXRecordDecl *, 4>;
 
-static Optional<TypeHierarchyItem>
-getTypeAncestors(const CXXRecordDecl &CXXRD, ASTContext &ASTCtx,
-                 RecursionProtectionSet &RPSet) {
-  Optional<TypeHierarchyItem> Result = declToTypeHierarchyItem(ASTCtx, CXXRD);
-  if (!Result)
-    return Result;
-
-  Result->parents.emplace();
-
+static void fillSuperTypes(const CXXRecordDecl &CXXRD, ASTContext &ASTCtx,
+                           std::vector<TypeHierarchyItem> &SuperTypes,
+                           RecursionProtectionSet &RPSet) {
   // typeParents() will replace dependent template specializations
   // with their class template, so to avoid infinite recursion for
   // certain types of hierarchies, keep the templates encountered
@@ -1149,22 +1143,22 @@ getTypeAncestors(const CXXRecordDecl &CXXRD, ASTContext &ASTCtx,
   auto *Pattern = CXXRD.getDescribedTemplate() ? &CXXRD : nullptr;
   if (Pattern) {
     if (!RPSet.insert(Pattern).second) {
-      return Result;
+      return;
     }
   }
 
   for (const CXXRecordDecl *ParentDecl : typeParents(&CXXRD)) {
     if (Optional<TypeHierarchyItem> ParentSym =
-            getTypeAncestors(*ParentDecl, ASTCtx, RPSet)) {
-      Result->parents->emplace_back(std::move(*ParentSym));
+            declToTypeHierarchyItem(ASTCtx, *ParentDecl)) {
+      ParentSym->parents.emplace();
+      fillSuperTypes(*ParentDecl, ASTCtx, *ParentSym->parents, RPSet);
+      SuperTypes.emplace_back(std::move(*ParentSym));
     }
   }
 
   if (Pattern) {
     RPSet.erase(Pattern);
   }
-
-  return Result;
 }
 
 const CXXRecordDecl *findRecordTypeAt(ParsedAST &AST, Position Pos) {
@@ -1231,12 +1225,19 @@ getTypeHierarchy(ParsedAST &AST, Position Pos, int ResolveLevels,
   if (!CXXRD)
     return llvm::None;
 
-  RecursionProtectionSet RPSet;
   Optional<TypeHierarchyItem> Result =
-      getTypeAncestors(*CXXRD, AST.getASTContext(), RPSet);
+      declToTypeHierarchyItem(AST.getASTContext(), *CXXRD);
   if (!Result)
     return Result;
 
+  if (Direction == TypeHierarchyDirection::Parents ||
+      Direction == TypeHierarchyDirection::Both) {
+    Result->parents.emplace();
+
+    RecursionProtectionSet RPSet;
+    fillSuperTypes(*CXXRD, AST.getASTContext(), *Result->parents, RPSet);
+  }
+
   if ((Direction == TypeHierarchyDirection::Children ||
        Direction == TypeHierarchyDirection::Both) &&
       ResolveLevels > 0) {
diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp
index 633a25fe3b442..cebfa8dfbeb93 100644
--- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp
@@ -630,7 +630,8 @@ struct Child2b : Child1 {};
   ASSERT_TRUE(bool(Result));
   EXPECT_THAT(
       *Result,
-      AllOf(WithName("Parent"), WithKind(SymbolKind::Struct), Parents(),
+      AllOf(WithName("Parent"), WithKind(SymbolKind::Struct),
+            ParentsNotResolved(),
             Children(AllOf(WithName("Child1"), WithKind(SymbolKind::Struct),
                            ParentsNotResolved(), ChildrenNotResolved()))));
 

From 0730710b3fc44b908b1fb4b1e91cbd87d0c1209f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 17 Jul 2019 15:31:53 +0000
Subject: [PATCH 357/451] Fix MSVC 'implicitly capture' compile error. NFCI.

llvm-svn: 366339
---
 clang-tools-extra/clangd/QueryDriverDatabase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp
index 110b8fc43fc2b..bdb928bde352c 100644
--- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp
+++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp
@@ -65,7 +65,7 @@ std::vector<std::string> parseDriverOutput(llvm::StringRef Output) {
 
   auto StartIt =
       std::find_if(Lines.begin(), Lines.end(),
-                   [](llvm::StringRef Line) { return Line.trim() == SIS; });
+                   [SIS](llvm::StringRef Line) { return Line.trim() == SIS; });
   if (StartIt == Lines.end()) {
     elog("System include extraction: start marker not found: {0}", Output);
     return {};

From 06eed4221374276691e191188aaa9d27bb90adde Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 17 Jul 2019 15:35:36 +0000
Subject: [PATCH 358/451] AMDGPU: Use getTargetConstant

Avoids creating an extra intermediate mov.

llvm-svn: 366340
---
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 ++--
 llvm/test/CodeGen/AMDGPU/shift-i128.ll        | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 9af01a73030d2..ea730539f8340 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -620,10 +620,10 @@ MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
                                                   EVT VT) const {
   SDNode *Lo = CurDAG->getMachineNode(
       AMDGPU::S_MOV_B32, DL, MVT::i32,
-      CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
+      CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
   SDNode *Hi =
       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
-                             CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
+                             CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
   const SDValue Ops[] = {
       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index dbf2da6dedb6a..9fb3ede34844b 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -147,13 +147,13 @@ define i128 @v_lshr_i128_kv(i128 %rhs) {
 ; GCN-LABEL: v_lshr_i128_kv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s5, 0
-; GCN-NEXT:    s_movk_i32 s4, 0x41
-; GCN-NEXT:    v_lshr_b64 v[2:3], s[4:5], v0
+; GCN-NEXT:    s_mov_b32 s7, 0
+; GCN-NEXT:    s_movk_i32 s6, 0x41
+; GCN-NEXT:    v_lshr_b64 v[2:3], s[6:7], v0
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v2, vcc
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x41
+; GCN-NEXT:    v_mov_b32_e32 v2, s6
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0

From aa5cdafa405e32956226dcc1cacd86430ac9ad1a Mon Sep 17 00:00:00 2001
From: Jonathan Peyton <jonathan.l.peyton@intel.com>
Date: Wed, 17 Jul 2019 15:41:00 +0000
Subject: [PATCH 359/451] Remove REQUIRES OMP spec version within lit tests

This is a follow up patch to D64534 (r365963) which removed all OMP
spec versioning within the OpenMP runtime codebase.  This patch removes
REQUIRES: openmp-x.y lines from lit tests.

llvm-svn: 366341
---
 openmp/runtime/test/affinity/bug-nested.c                        | 1 -
 openmp/runtime/test/tasking/bug_nested_proxy_task.c              | 1 -
 openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c         | 1 -
 openmp/runtime/test/tasking/kmp_taskloop.c                       | 1 -
 openmp/runtime/test/tasking/omp_task_priority.c                  | 1 -
 openmp/runtime/test/tasking/omp_taskloop_grainsize.c             | 1 -
 openmp/runtime/test/tasking/omp_taskloop_num_tasks.c             | 1 -
 openmp/runtime/test/worksharing/for/kmp_doacross_check.c         | 1 -
 openmp/runtime/test/worksharing/for/kmp_sch_simd_guided.c        | 1 -
 openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c   | 1 -
 .../runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c   | 1 -
 .../runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c   | 1 -
 openmp/runtime/test/worksharing/for/omp_doacross.c               | 1 -
 13 files changed, 13 deletions(-)

diff --git a/openmp/runtime/test/affinity/bug-nested.c b/openmp/runtime/test/affinity/bug-nested.c
index a81b5f36292bd..4396182cbb47e 100644
--- a/openmp/runtime/test/affinity/bug-nested.c
+++ b/openmp/runtime/test/affinity/bug-nested.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile && env KMP_AFFINITY=compact %libomp-run
-// REQUIRES: openmp-4.0
 
 #include <stdio.h>
 #include <stdint.h>
diff --git a/openmp/runtime/test/tasking/bug_nested_proxy_task.c b/openmp/runtime/test/tasking/bug_nested_proxy_task.c
index 3f0a01542d4e6..f70e9044a63bf 100644
--- a/openmp/runtime/test/tasking/bug_nested_proxy_task.c
+++ b/openmp/runtime/test/tasking/bug_nested_proxy_task.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile-and-run
-// REQUIRES: openmp-4.5
 // The runtime currently does not get dependency information from GCC.
 // UNSUPPORTED: gcc
 
diff --git a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
index 03e18a6a6e606..c07f399d2b43e 100644
--- a/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
+++ b/openmp/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile-and-run
-// REQUIRES: openmp-4.5
 // The runtime currently does not get dependency information from GCC.
 // UNSUPPORTED: gcc
 
diff --git a/openmp/runtime/test/tasking/kmp_taskloop.c b/openmp/runtime/test/tasking/kmp_taskloop.c
index 359f7a4ae916d..4b137933bb014 100644
--- a/openmp/runtime/test/tasking/kmp_taskloop.c
+++ b/openmp/runtime/test/tasking/kmp_taskloop.c
@@ -1,6 +1,5 @@
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
-// REQUIRES: openmp-4.5
 #include <stdio.h>
 #include <omp.h>
 #include "omp_my_sleep.h"
diff --git a/openmp/runtime/test/tasking/omp_task_priority.c b/openmp/runtime/test/tasking/omp_task_priority.c
index 6acb4a804e8ed..7b62360173341 100644
--- a/openmp/runtime/test/tasking/omp_task_priority.c
+++ b/openmp/runtime/test/tasking/omp_task_priority.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run
-// REQUIRES: openmp-4.5
 // Test OMP 4.5 task priorities
 // Currently only API function and envirable parsing implemented.
 // Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below.
diff --git a/openmp/runtime/test/tasking/omp_taskloop_grainsize.c b/openmp/runtime/test/tasking/omp_taskloop_grainsize.c
index c5756a481a185..0833073efb247 100644
--- a/openmp/runtime/test/tasking/omp_taskloop_grainsize.c
+++ b/openmp/runtime/test/tasking/omp_taskloop_grainsize.c
@@ -1,6 +1,5 @@
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
-// REQUIRES: openmp-4.5
 
 // These compilers don't support the taskloop construct
 // UNSUPPORTED: gcc-4, gcc-5, icc-16
diff --git a/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c b/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
index bed2c5b207edc..75cc337aad914 100644
--- a/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
+++ b/openmp/runtime/test/tasking/omp_taskloop_num_tasks.c
@@ -2,7 +2,6 @@
 // UNSUPPORTED: netbsd
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
-// REQUIRES: openmp-4.5
 
 // These compilers don't support the taskloop construct
 // UNSUPPORTED: gcc-4, gcc-5, icc-16
diff --git a/openmp/runtime/test/worksharing/for/kmp_doacross_check.c b/openmp/runtime/test/worksharing/for/kmp_doacross_check.c
index 4eea328833a8b..59b61e32eb16a 100644
--- a/openmp/runtime/test/worksharing/for/kmp_doacross_check.c
+++ b/openmp/runtime/test/worksharing/for/kmp_doacross_check.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile-and-run
-// REQUIRES: openmp-4.5
 // UNSUPPORTED: gcc
 // This test is incompatible with gcc because of the explicit call to
 // __kmpc_doacross_fini().  gcc relies on an implicit call to this function
diff --git a/openmp/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/openmp/runtime/test/worksharing/for/kmp_sch_simd_guided.c
index 6cf5d2ff82244..5c6f94bc729af 100644
--- a/openmp/runtime/test/worksharing/for/kmp_sch_simd_guided.c
+++ b/openmp/runtime/test/worksharing/for/kmp_sch_simd_guided.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile-and-run
-// REQUIRES: openmp-4.5
 /*
   Test for the 'schedule(simd:guided)' clause.
   Compiler needs to generate a dynamic dispatching and pass the schedule
diff --git a/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
index 8b5f34a8c3fdd..987a5c0d45945 100644
--- a/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
+++ b/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile-and-run
-// REQUIRES: openmp-4.5
 
 // The test checks schedule(simd:runtime)
 // in combination with omp_set_schedule()
diff --git a/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
index 142e9b3633451..5dfaf24185e76 100644
--- a/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
+++ b/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
@@ -6,7 +6,6 @@
 // RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
 // RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
 // RUN: env OMP_SCHEDULE=auto      %libomp-run
-// REQUIRES: openmp-4.5
 
 // The test checks schedule(simd:runtime)
 // in combination with OMP_SCHEDULE=guided[,chunk]
diff --git a/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
index e2c878fe79f32..d76046bac1f80 100644
--- a/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
+++ b/openmp/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
@@ -1,6 +1,5 @@
 // RUN: %libomp-compile && %libomp-run
 // RUN: %libomp-run 1 && %libomp-run 2
-// REQUIRES: openmp-4.5
 
 // The test checks schedule(simd:runtime)
 // in combination with OMP_SCHEDULE=static[,chunk]
diff --git a/openmp/runtime/test/worksharing/for/omp_doacross.c b/openmp/runtime/test/worksharing/for/omp_doacross.c
index 32e8e82d331de..364430675d647 100644
--- a/openmp/runtime/test/worksharing/for/omp_doacross.c
+++ b/openmp/runtime/test/worksharing/for/omp_doacross.c
@@ -1,5 +1,4 @@
 // RUN: %libomp-compile-and-run
-// REQUIRES: openmp-4.5
 // XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16
 #include <stdio.h>
 #include <stdlib.h>

From 9a050f92bb29000a1a961f7c9ca80d7ec13dc310 Mon Sep 17 00:00:00 2001
From: Julian Lettner <jlettner@apple.com>
Date: Wed, 17 Jul 2019 16:09:25 +0000
Subject: [PATCH 360/451] [ASan] Support `{f}puts(NULL)` on Darwin

On Darwin, the man page states that "both fputs() and puts() print
`(null)' if str is NULL."

rdar://48227136

Reviewed By: Lekensteyn

Differential Revision: https://reviews.llvm.org/D64773

llvm-svn: 366342
---
 .../sanitizer_common_interceptors.inc            |  6 ++++--
 .../TestCases/Darwin/fputs_puts_null.cc          | 16 ++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Darwin/fputs_puts_null.cc

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 5b68c019190a0..9f5a91ac99dc6 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -1241,7 +1241,8 @@ INTERCEPTOR_WITH_SUFFIX(int, fputs, char *s, void *file) {
   // libc file streams can call user-supplied functions, see fopencookie.
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fputs, s, file);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+  if (!SANITIZER_MAC || s)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
   return REAL(fputs)(s, file);
 }
 #define INIT_FPUTS COMMON_INTERCEPT_FUNCTION(fputs)
@@ -1254,7 +1255,8 @@ INTERCEPTOR(int, puts, char *s) {
   // libc file streams can call user-supplied functions, see fopencookie.
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+  if (!SANITIZER_MAC || s)
+    COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
   return REAL(puts)(s);
 }
 #define INIT_PUTS COMMON_INTERCEPT_FUNCTION(puts)
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Darwin/fputs_puts_null.cc b/compiler-rt/test/sanitizer_common/TestCases/Darwin/fputs_puts_null.cc
new file mode 100644
index 0000000000000..705ca2e981cb2
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Darwin/fputs_puts_null.cc
@@ -0,0 +1,16 @@
+// On Darwin, the man page states that "both fputs() and puts() print `(null)'
+// if str is NULL."
+//
+// RUN: %clangxx -g %s -o %t && %run %t | FileCheck %s
+// CHECK: {{^\(null\)---\(null\)$}}
+
+#include <assert.h>
+#include <stdio.h>
+
+int main(void) {
+  assert(fputs(NULL, stdout) >= 0);
+  fputs("---", stdout);
+  assert(puts(NULL) >= 0);
+
+  return 0;
+}

From 544315b4197034a3be8acd12cba56a75fb1f08dc Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 17 Jul 2019 16:14:52 +0000
Subject: [PATCH 361/451] [RISCV] Revert r366331 as it exposed some sanitizer
 failures

See <http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/33612>.

llvm-svn: 366343
---
 llvm/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index b8eb19848bc58..acebd3feb8bf8 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -285,7 +285,6 @@ set(LLVM_ALL_TARGETS
   MSP430
   NVPTX
   PowerPC
-  RISCV
   Sparc
   SystemZ
   WebAssembly

From 1716454027dbef52f04dc3abbb708cb73c254318 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Wed, 17 Jul 2019 16:40:52 +0000
Subject: [PATCH 362/451] [ORC] Add deprecation warnings to ORCv1 layers and
 utilities.

Summary:
ORCv1 is deprecated. The current aim is to remove it before the LLVM 10.0
release. This patch adds deprecation attributes to the ORCv1 layers and
utilities to warn clients of the change.

Reviewers: dblaikie, sgraenitz, AlexDenisov

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64609

llvm-svn: 366344
---
 .../BuildingAJIT/Chapter3/KaleidoscopeJIT.h   | 26 ++++----
 .../BuildingAJIT/Chapter4/KaleidoscopeJIT.h   |  7 +-
 .../BuildingAJIT/Chapter5/KaleidoscopeJIT.h   |  7 +-
 .../Kaleidoscope/include/KaleidoscopeJIT.h    |  9 ++-
 .../llvm/ExecutionEngine/ExecutionEngine.h    | 13 +++-
 .../Orc/CompileOnDemandLayer.h                | 45 ++++++++++---
 llvm/include/llvm/ExecutionEngine/Orc/Core.h  |  1 +
 .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 30 ++++++++-
 .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 20 +++++-
 .../ExecutionEngine/Orc/IRTransformLayer.h    | 19 +++++-
 .../llvm/ExecutionEngine/Orc/LambdaResolver.h | 27 +++++++-
 .../ExecutionEngine/Orc/LazyEmittingLayer.h   | 13 +++-
 .../Orc/ObjectTransformLayer.h                | 16 ++++-
 .../Orc/RTDyldObjectLinkingLayer.h            | 18 ++++--
 .../ExecutionEngine/Orc/RemoteObjectLayer.h   | 53 +++++++++++++--
 .../llvm/ExecutionEngine/OrcV1Deprecation.h   | 22 +++++++
 .../ExecutionEngine/Orc/OrcCBindingsStack.h   | 46 +++++++------
 .../Orc/OrcMCJITReplacement.cpp               |  3 +-
 .../ExecutionEngine/Orc/OrcMCJITReplacement.h | 10 +--
 .../Orc/RTDyldObjectLinkingLayer.cpp          |  9 +++
 llvm/tools/lli/lli.cpp                        | 25 ++++----
 .../Orc/LazyEmittingLayerTest.cpp             |  3 +-
 .../Orc/LegacyCompileOnDemandLayerTest.cpp    |  2 +-
 .../LegacyRTDyldObjectLinkingLayerTest.cpp    | 35 +++++-----
 .../Orc/ObjectTransformLayerTest.cpp          | 33 ++++++----
 .../Orc/RemoteObjectLayerTest.cpp             | 64 +++++++++----------
 26 files changed, 400 insertions(+), 156 deletions(-)
 create mode 100644 llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h

diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h
index 717fbb7d99511..35104f926d4f9 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h
@@ -64,28 +64,30 @@ class KaleidoscopeJIT {
 public:
   KaleidoscopeJIT()
       : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
-        ObjectLayer(ES,
+        ObjectLayer(AcknowledgeORCv1Deprecation, ES,
                     [this](VModuleKey K) {
                       return LegacyRTDyldObjectLinkingLayer::Resources{
                           std::make_shared<SectionMemoryManager>(),
                           Resolvers[K]};
                     }),
-        CompileLayer(ObjectLayer, SimpleCompiler(*TM)),
-        OptimizeLayer(CompileLayer,
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     SimpleCompiler(*TM)),
+        OptimizeLayer(AcknowledgeORCv1Deprecation, CompileLayer,
                       [this](std::unique_ptr<Module> M) {
                         return optimizeModule(std::move(M));
                       }),
         CompileCallbackManager(cantFail(orc::createLocalCompileCallbackManager(
             TM->getTargetTriple(), ES, 0))),
-        CODLayer(ES, OptimizeLayer,
-                 [&](orc::VModuleKey K) { return Resolvers[K]; },
-                 [&](orc::VModuleKey K, std::shared_ptr<SymbolResolver> R) {
-                   Resolvers[K] = std::move(R);
-                 },
-                 [](Function &F) { return std::set<Function *>({&F}); },
-                 *CompileCallbackManager,
-                 orc::createLocalIndirectStubsManagerBuilder(
-                     TM->getTargetTriple())) {
+        CODLayer(
+            AcknowledgeORCv1Deprecation, ES, OptimizeLayer,
+            [&](orc::VModuleKey K) { return Resolvers[K]; },
+            [&](orc::VModuleKey K, std::shared_ptr<SymbolResolver> R) {
+              Resolvers[K] = std::move(R);
+            },
+            [](Function &F) { return std::set<Function *>({&F}); },
+            *CompileCallbackManager,
+            orc::createLocalIndirectStubsManagerBuilder(
+                TM->getTargetTriple())) {
     llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
   }
 
diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h
index 3cf521ce2be00..ee5225672fc42 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h
@@ -105,13 +105,14 @@ class KaleidoscopeJIT {
             },
             [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })),
         TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
-        ObjectLayer(ES,
+        ObjectLayer(AcknowledgeORCv1Deprecation, ES,
                     [this](VModuleKey K) {
                       return LegacyRTDyldObjectLinkingLayer::Resources{
                           std::make_shared<SectionMemoryManager>(), Resolver};
                     }),
-        CompileLayer(ObjectLayer, SimpleCompiler(*TM)),
-        OptimizeLayer(CompileLayer,
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     SimpleCompiler(*TM)),
+        OptimizeLayer(AcknowledgeORCv1Deprecation, CompileLayer,
                       [this](std::unique_ptr<Module> M) {
                         return optimizeModule(std::move(M));
                       }),
diff --git a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h
index d6c9d12c538c2..5cc64da68cc3e 100644
--- a/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h
+++ b/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h
@@ -113,14 +113,15 @@ class KaleidoscopeJIT {
         TM(EngineBuilder().selectTarget(Triple(Remote.getTargetTriple()), "",
                                         "", SmallVector<std::string, 0>())),
         DL(TM->createDataLayout()),
-        ObjectLayer(ES,
+        ObjectLayer(AcknowledgeORCv1Deprecation, ES,
                     [this](VModuleKey K) {
                       return LegacyRTDyldObjectLinkingLayer::Resources{
                           cantFail(this->Remote.createRemoteMemoryManager()),
                           Resolver};
                     }),
-        CompileLayer(ObjectLayer, SimpleCompiler(*TM)),
-        OptimizeLayer(CompileLayer,
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     SimpleCompiler(*TM)),
+        OptimizeLayer(AcknowledgeORCv1Deprecation, CompileLayer,
                       [this](std::unique_ptr<Module> M) {
                         return optimizeModule(std::move(M));
                       }),
diff --git a/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h
index 162dcfed989c2..a253a973a4cc9 100644
--- a/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h
+++ b/llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h
@@ -45,17 +45,16 @@ class KaleidoscopeJIT {
   KaleidoscopeJIT()
       : Resolver(createLegacyLookupResolver(
             ES,
-            [this](const std::string &Name) {
-              return findMangledSymbol(Name);
-            },
+            [this](const std::string &Name) { return findMangledSymbol(Name); },
             [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })),
         TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
-        ObjectLayer(ES,
+        ObjectLayer(AcknowledgeORCv1Deprecation, ES,
                     [this](VModuleKey) {
                       return ObjLayerT::Resources{
                           std::make_shared<SectionMemoryManager>(), Resolver};
                     }),
-        CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     SimpleCompiler(*TM)) {
     llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
   }
 
diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 68cc920483b63..4fb6dad963879 100644
--- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/Binary.h"
@@ -634,7 +635,13 @@ class EngineBuilder {
   }
 
   // Use OrcMCJITReplacement instead of MCJIT. Off by default.
-  void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
+  LLVM_ATTRIBUTE_DEPRECATED(
+      inline void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement),
+      "ORCv1 utilities (including OrcMCJITReplacement) are deprecated. Please "
+      "use ORCv2/LLJIT instead (see docs/ORCv2.rst)");
+
+  void setUseOrcMCJITReplacement(ORCv1DeprecationAcknowledgement,
+                                 bool UseOrcMCJITReplacement) {
     this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
   }
 
@@ -658,6 +665,10 @@ class EngineBuilder {
   ExecutionEngine *create(TargetMachine *TM);
 };
 
+void EngineBuilder::setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) {
+  this->UseOrcMCJITReplacement = UseOrcMCJITReplacement;
+}
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index ca1ce40cdaef1..5f593a27cad6c 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -264,13 +264,26 @@ class LegacyCompileOnDemandLayer {
       std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>;
 
   /// Construct a compile-on-demand layer instance.
-  LegacyCompileOnDemandLayer(ExecutionSession &ES, BaseLayerT &BaseLayer,
-                             SymbolResolverGetter GetSymbolResolver,
-                             SymbolResolverSetter SetSymbolResolver,
-                             PartitioningFtor Partition,
-                             CompileCallbackMgrT &CallbackMgr,
-                             IndirectStubsManagerBuilderT CreateIndirectStubsManager,
-                             bool CloneStubsIntoPartitions = true)
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyCompileOnDemandLayer(
+          ExecutionSession &ES, BaseLayerT &BaseLayer,
+          SymbolResolverGetter GetSymbolResolver,
+          SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+          CompileCallbackMgrT &CallbackMgr,
+          IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+          bool CloneStubsIntoPartitions = true),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 LegacyCompileOnDemandLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
+  LegacyCompileOnDemandLayer(
+      ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
+      BaseLayerT &BaseLayer, SymbolResolverGetter GetSymbolResolver,
+      SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+      CompileCallbackMgrT &CallbackMgr,
+      IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+      bool CloneStubsIntoPartitions = true)
       : ES(ES), BaseLayer(BaseLayer),
         GetSymbolResolver(std::move(GetSymbolResolver)),
         SetSymbolResolver(std::move(SetSymbolResolver)),
@@ -729,8 +742,24 @@ class LegacyCompileOnDemandLayer {
   bool CloneStubsIntoPartitions;
 };
 
-} // end namespace orc
+template <typename BaseLayerT, typename CompileCallbackMgrT,
+          typename IndirectStubsMgrT>
+LegacyCompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT, IndirectStubsMgrT>::
+    LegacyCompileOnDemandLayer(
+        ExecutionSession &ES, BaseLayerT &BaseLayer,
+        SymbolResolverGetter GetSymbolResolver,
+        SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition,
+        CompileCallbackMgrT &CallbackMgr,
+        IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+        bool CloneStubsIntoPartitions)
+    : ES(ES), BaseLayer(BaseLayer),
+      GetSymbolResolver(std::move(GetSymbolResolver)),
+      SetSymbolResolver(std::move(SetSymbolResolver)),
+      Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr),
+      CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
+      CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
 
+} // end namespace orc
 } // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 016fd829bf983..94a5618233e4a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index ae3ab8c095f21..75865920c741d 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -97,7 +97,14 @@ class LegacyCtorDtorRunner {
 public:
   /// Construct a CtorDtorRunner for the given range using the given
   ///        name mangling function.
-  LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames, VModuleKey K)
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames,
+                           VModuleKey K),
+      "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
+      "Please use the ORCv2 CtorDtorRunner utility instead");
+
+  LegacyCtorDtorRunner(ORCv1DeprecationAcknowledgement,
+                       std::vector<std::string> CtorDtorNames, VModuleKey K)
       : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
 
   /// Run the recorded constructors/destructors through the given JIT
@@ -128,6 +135,11 @@ class LegacyCtorDtorRunner {
   orc::VModuleKey K;
 };
 
+template <typename JITLayerT>
+LegacyCtorDtorRunner<JITLayerT>::LegacyCtorDtorRunner(
+    std::vector<std::string> CtorDtorNames, VModuleKey K)
+    : CtorDtorNames(std::move(CtorDtorNames)), K(K) {}
+
 class CtorDtorRunner {
 public:
   CtorDtorRunner(JITDylib &JD) : JD(JD) {}
@@ -180,7 +192,14 @@ class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
 public:
   /// Create a runtime-overrides class.
   template <typename MangleFtorT>
-  LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle) {
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle),
+      "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. "
+      "Please use the ORCv2 LocalCXXRuntimeOverrides utility instead");
+
+  template <typename MangleFtorT>
+  LegacyLocalCXXRuntimeOverrides(ORCv1DeprecationAcknowledgement,
+                                 const MangleFtorT &Mangle) {
     addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
     addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
   }
@@ -201,6 +220,13 @@ class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
   StringMap<JITTargetAddress> CXXRuntimeOverrides;
 };
 
+template <typename MangleFtorT>
+LegacyLocalCXXRuntimeOverrides::LegacyLocalCXXRuntimeOverrides(
+    const MangleFtorT &Mangle) {
+  addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride));
+  addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride));
+}
+
 class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase {
 public:
   Error enable(JITDylib &JD, MangleAndInterner &Mangler);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index ecff09b98f873..52223a83ad420 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -63,8 +63,18 @@ class LegacyIRCompileLayer {
 
   /// Construct an LegacyIRCompileLayer with the given BaseLayer, which must
   ///        implement the ObjectLayer concept.
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyIRCompileLayer(
+          BaseLayerT &BaseLayer, CompileFtor Compile,
+          NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 IRCompileLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
   LegacyIRCompileLayer(
-      BaseLayerT &BaseLayer, CompileFtor Compile,
+      ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
+      CompileFtor Compile,
       NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback())
       : BaseLayer(BaseLayer), Compile(std::move(Compile)),
         NotifyCompiled(std::move(NotifyCompiled)) {}
@@ -122,8 +132,14 @@ class LegacyIRCompileLayer {
   NotifyCompiledCallback NotifyCompiled;
 };
 
-} // end namespace orc
+template <typename BaseLayerT, typename CompileFtor>
+LegacyIRCompileLayer<BaseLayerT, CompileFtor>::LegacyIRCompileLayer(
+    BaseLayerT &BaseLayer, CompileFtor Compile,
+    NotifyCompiledCallback NotifyCompiled)
+    : BaseLayer(BaseLayer), Compile(std::move(Compile)),
+      NotifyCompiled(std::move(NotifyCompiled)) {}
 
+} // end namespace orc
 } // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_IRCOMPILINGLAYER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 8890a572f65a8..1b4c8b6cd95fe 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -56,9 +56,17 @@ class LegacyIRTransformLayer {
 public:
 
   /// Construct an LegacyIRTransformLayer with the given BaseLayer
-  LegacyIRTransformLayer(BaseLayerT &BaseLayer,
-                   TransformFtor Transform = TransformFtor())
-    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyIRTransformLayer(BaseLayerT &BaseLayer,
+                             TransformFtor Transform = TransformFtor()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 IRTransformLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
+  LegacyIRTransformLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer,
+                         TransformFtor Transform = TransformFtor())
+      : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
   /// Apply the transform functor to the module, then add the module to
   ///        the layer below, along with the memory manager and symbol resolver.
@@ -108,6 +116,11 @@ class LegacyIRTransformLayer {
   TransformFtor Transform;
 };
 
+template <typename BaseLayerT, typename TransformFtor>
+LegacyIRTransformLayer<BaseLayerT, TransformFtor>::LegacyIRTransformLayer(
+    BaseLayerT &BaseLayer, TransformFtor Transform)
+    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
index 92efdeded3625..855e31b335498 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -24,7 +24,15 @@ namespace orc {
 template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
 class LambdaResolver : public LegacyJITSymbolResolver {
 public:
-  LambdaResolver(DylibLookupFtorT DylibLookupFtor,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LambdaResolver(DylibLookupFtorT DylibLookupFtor,
+                     ExternalLookupFtorT ExternalLookupFtor),
+      "ORCv1 utilities (including resolvers) are deprecated and will be "
+      "removed "
+      "in the next release. Please use ORCv2 (see docs/ORCv2.rst)");
+
+  LambdaResolver(ORCv1DeprecationAcknowledgement,
+                 DylibLookupFtorT DylibLookupFtor,
                  ExternalLookupFtorT ExternalLookupFtor)
       : DylibLookupFtor(DylibLookupFtor),
         ExternalLookupFtor(ExternalLookupFtor) {}
@@ -42,6 +50,12 @@ class LambdaResolver : public LegacyJITSymbolResolver {
   ExternalLookupFtorT ExternalLookupFtor;
 };
 
+template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
+LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>::LambdaResolver(
+    DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor)
+    : DylibLookupFtor(DylibLookupFtor), ExternalLookupFtor(ExternalLookupFtor) {
+}
+
 template <typename DylibLookupFtorT,
           typename ExternalLookupFtorT>
 std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
@@ -52,6 +66,17 @@ createLambdaResolver(DylibLookupFtorT DylibLookupFtor,
                          std::move(ExternalLookupFtor));
 }
 
+template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
+std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>>
+createLambdaResolver(ORCv1DeprecationAcknowledgement,
+                     DylibLookupFtorT DylibLookupFtor,
+                     ExternalLookupFtorT ExternalLookupFtor) {
+  using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>;
+  return make_unique<LR>(AcknowledgeORCv1Deprecation,
+                         std::move(DylibLookupFtor),
+                         std::move(ExternalLookupFtor));
+}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index e5c5feb228126..16202d89f8612 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -196,7 +196,14 @@ template <typename BaseLayerT> class LazyEmittingLayer {
 public:
 
   /// Construct a lazy emitting layer.
-  LazyEmittingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LazyEmittingLayer(BaseLayerT &BaseLayer),
+      "ORCv1 layers (including LazyEmittingLayer) are deprecated. Please use "
+      "ORCv2, where lazy emission is the default");
+
+  /// Construct a lazy emitting layer.
+  LazyEmittingLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer)
+      : BaseLayer(BaseLayer) {}
 
   /// Add the given module to the lazy emitting layer.
   Error addModule(VModuleKey K, std::unique_ptr<Module> M) {
@@ -254,6 +261,10 @@ template <typename BaseLayerT> class LazyEmittingLayer {
   }
 };
 
+template <typename BaseLayerT>
+LazyEmittingLayer<BaseLayerT>::LazyEmittingLayer(BaseLayerT &BaseLayer)
+    : BaseLayer(BaseLayer) {}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index de4603f98f83e..eac1cc3e097a6 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -48,7 +48,16 @@ template <typename BaseLayerT, typename TransformFtor>
 class LegacyObjectTransformLayer {
 public:
   /// Construct an ObjectTransformLayer with the given BaseLayer
-  LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyObjectTransformLayer(BaseLayerT &BaseLayer,
+                                 TransformFtor Transform = TransformFtor()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "the ORCv2 ObjectTransformLayer instead");
+
+  /// Legacy layer constructor with deprecation acknowledgement.
+  LegacyObjectTransformLayer(ORCv1DeprecationAcknowledgement,
+                             BaseLayerT &BaseLayer,
                              TransformFtor Transform = TransformFtor())
       : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
 
@@ -107,6 +116,11 @@ class LegacyObjectTransformLayer {
   TransformFtor Transform;
 };
 
+template <typename BaseLayerT, typename TransformFtor>
+LegacyObjectTransformLayer<BaseLayerT, TransformFtor>::
+    LegacyObjectTransformLayer(BaseLayerT &BaseLayer, TransformFtor Transform)
+    : BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
index 479658b11e97c..d9535ce5f21ff 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h
@@ -353,17 +353,27 @@ class LegacyRTDyldObjectLinkingLayer : public LegacyRTDyldObjectLinkingLayerBase
 
   /// Construct an ObjectLinkingLayer with the given NotifyLoaded,
   ///        and NotifyFinalized functors.
+  LLVM_ATTRIBUTE_DEPRECATED(
+      LegacyRTDyldObjectLinkingLayer(
+          ExecutionSession &ES, ResourcesGetter GetResources,
+          NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
+          NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
+          NotifyFreedFtor NotifyFreed = NotifyFreedFtor()),
+      "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please "
+      "use "
+      "ORCv2 (see docs/ORCv2.rst)");
+
+  // Legacy layer constructor with deprecation acknowledgement.
   LegacyRTDyldObjectLinkingLayer(
-      ExecutionSession &ES, ResourcesGetter GetResources,
+      ORCv1DeprecationAcknowledgement, ExecutionSession &ES,
+      ResourcesGetter GetResources,
       NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
       NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(),
       NotifyFreedFtor NotifyFreed = NotifyFreedFtor())
       : ES(ES), GetResources(std::move(GetResources)),
         NotifyLoaded(std::move(NotifyLoaded)),
         NotifyFinalized(std::move(NotifyFinalized)),
-        NotifyFreed(std::move(NotifyFreed)),
-        ProcessAllSections(false) {
-  }
+        NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
 
   /// Set the 'ProcessAllSections' flag.
   ///
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
index 9550edceb1008..b87cf697a81e5 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h
@@ -13,9 +13,10 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H
 
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
 #include <map>
 
 namespace llvm {
@@ -312,7 +313,14 @@ class RemoteObjectClientLayer : public RemoteObjectLayer<RPCEndpoint> {
   ///
   /// The ReportError functor can be used locally log errors that are intended
   /// to be sent  sent
-  RemoteObjectClientLayer(RPCEndpoint &Remote,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      RemoteObjectClientLayer(RPCEndpoint &Remote,
+                              std::function<void(Error)> ReportError),
+      "ORCv1 layers (including RemoteObjectClientLayer) are deprecated. Please "
+      "use "
+      "ORCv2 (see docs/ORCv2.rst)");
+
+  RemoteObjectClientLayer(ORCv1DeprecationAcknowledgement, RPCEndpoint &Remote,
                           std::function<void(Error)> ReportError)
       : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
     using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
@@ -417,11 +425,18 @@ class RemoteObjectServerLayer : public RemoteObjectLayer<RPCEndpoint> {
 
   /// Create a RemoteObjectServerLayer with the given base layer (which must be
   /// an object layer), RPC endpoint, and error reporter function.
-  RemoteObjectServerLayer(BaseLayerT &BaseLayer,
-                          RPCEndpoint &Remote,
+  LLVM_ATTRIBUTE_DEPRECATED(
+      RemoteObjectServerLayer(BaseLayerT &BaseLayer, RPCEndpoint &Remote,
+                              std::function<void(Error)> ReportError),
+      "ORCv1 layers (including RemoteObjectServerLayer) are deprecated. Please "
+      "use "
+      "ORCv2 (see docs/ORCv2.rst)");
+
+  RemoteObjectServerLayer(ORCv1DeprecationAcknowledgement,
+                          BaseLayerT &BaseLayer, RPCEndpoint &Remote,
                           std::function<void(Error)> ReportError)
-    : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
-      BaseLayer(BaseLayer), HandleIdMgr(1) {
+      : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
+        BaseLayer(BaseLayer), HandleIdMgr(1) {
     using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
 
     Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
@@ -462,6 +477,7 @@ class RemoteObjectServerLayer : public RemoteObjectLayer<RPCEndpoint> {
     assert(!BaseLayerHandles.count(Id) && "Id already in use?");
 
     auto Resolver = createLambdaResolver(
+        AcknowledgeORCv1Deprecation,
         [this, Id](const std::string &Name) { return lookup(Id, Name); },
         [this, Id](const std::string &Name) {
           return lookupInLogicalDylib(Id, Name);
@@ -522,6 +538,31 @@ class RemoteObjectServerLayer : public RemoteObjectLayer<RPCEndpoint> {
   std::map<ObjHandleT, typename BaseLayerT::ObjHandleT> BaseLayerHandles;
 };
 
+template <typename RPCEndpoint>
+RemoteObjectClientLayer<RPCEndpoint>::RemoteObjectClientLayer(
+    RPCEndpoint &Remote, std::function<void(Error)> ReportError)
+    : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) {
+  using ThisT = RemoteObjectClientLayer<RPCEndpoint>;
+  Remote.template addHandler<Lookup>(*this, &ThisT::lookup);
+  Remote.template addHandler<LookupInLogicalDylib>(
+      *this, &ThisT::lookupInLogicalDylib);
+}
+
+template <typename BaseLayerT, typename RPCEndpoint>
+RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>::RemoteObjectServerLayer(
+    BaseLayerT &BaseLayer, RPCEndpoint &Remote,
+    std::function<void(Error)> ReportError)
+    : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)),
+      BaseLayer(BaseLayer), HandleIdMgr(1) {
+  using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>;
+
+  Remote.template addHandler<AddObject>(*this, &ThisT::addObject);
+  Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject);
+  Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol);
+  Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn);
+  Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize);
+}
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
new file mode 100644
index 0000000000000..7ed254b3ee04f
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
@@ -0,0 +1,22 @@
+//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Tag for suppressing ORCv1 deprecation warnings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+
+namespace llvm {
+
+enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation };
+
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 71f05b0e8063d..98129e1690d27 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -211,28 +211,31 @@ class OrcCBindingsStack {
                     IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
       : CCMgr(createCompileCallbackManager(TM, ES)), DL(TM.createDataLayout()),
         IndirectStubsMgr(IndirectStubsMgrBuilder()),
-        ObjectLayer(ES,
-                    [this](orc::VModuleKey K) {
-                      auto ResolverI = Resolvers.find(K);
-                      assert(ResolverI != Resolvers.end() &&
-                             "No resolver for module K");
-                      auto Resolver = std::move(ResolverI->second);
-                      Resolvers.erase(ResolverI);
-                      return ObjLayerT::Resources{
-                          std::make_shared<SectionMemoryManager>(), Resolver};
-                    },
-                    nullptr,
-                    [this](orc::VModuleKey K, const object::ObjectFile &Obj,
-                           const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
-		      this->notifyFinalized(K, Obj, LoadedObjInfo);
-                    },
-                    [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
-		      this->notifyFreed(K, Obj);
-                    }),
-        CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
+        ObjectLayer(
+            AcknowledgeORCv1Deprecation, ES,
+            [this](orc::VModuleKey K) {
+              auto ResolverI = Resolvers.find(K);
+              assert(ResolverI != Resolvers.end() &&
+                     "No resolver for module K");
+              auto Resolver = std::move(ResolverI->second);
+              Resolvers.erase(ResolverI);
+              return ObjLayerT::Resources{
+                  std::make_shared<SectionMemoryManager>(), Resolver};
+            },
+            nullptr,
+            [this](orc::VModuleKey K, const object::ObjectFile &Obj,
+                   const RuntimeDyld::LoadedObjectInfo &LoadedObjInfo) {
+              this->notifyFinalized(K, Obj, LoadedObjInfo);
+            },
+            [this](orc::VModuleKey K, const object::ObjectFile &Obj) {
+              this->notifyFreed(K, Obj);
+            }),
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     orc::SimpleCompiler(TM)),
         CODLayer(createCODLayer(ES, CompileLayer, CCMgr.get(),
                                 std::move(IndirectStubsMgrBuilder), Resolvers)),
         CXXRuntimeOverrides(
+            AcknowledgeORCv1Deprecation,
             [this](const std::string &S) { return mangle(S); }) {}
 
   Error shutdown() {
@@ -308,7 +311,8 @@ class OrcCBindingsStack {
 
     // Run the static constructors, and save the static destructor runner for
     // execution when the JIT is torn down.
-    orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), K);
+    orc::LegacyCtorDtorRunner<OrcCBindingsStack> CtorRunner(
+        AcknowledgeORCv1Deprecation, std::move(CtorNames), K);
     if (auto Err = CtorRunner.runViaLayer(*this))
       return std::move(Err);
 
@@ -465,7 +469,7 @@ class OrcCBindingsStack {
       return nullptr;
 
     return llvm::make_unique<CODLayerT>(
-        ES, CompileLayer,
+        AcknowledgeORCv1Deprecation, ES, CompileLayer,
         [&Resolvers](orc::VModuleKey K) {
           auto ResolverI = Resolvers.find(K);
           assert(ResolverI != Resolvers.end() && "No resolver for module K");
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp b/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
index 7c56ecc36bedd..772a9c2c4ab2f 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.cpp
@@ -127,7 +127,8 @@ void OrcMCJITReplacement::runStaticConstructorsDestructors(bool isDtors) {
   auto &CtorDtorsMap = isDtors ? UnexecutedDestructors : UnexecutedConstructors;
 
   for (auto &KV : CtorDtorsMap)
-    cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(std::move(KV.second), KV.first)
+    cantFail(LegacyCtorDtorRunner<LazyEmitLayerT>(
+                 AcknowledgeORCv1Deprecation, std::move(KV.second), KV.first)
                  .runViaLayer(LazyEmitLayer));
 
   CtorDtorsMap.clear();
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 5585008bc8e99..169dc8f1d02b8 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -232,24 +232,24 @@ class OrcMCJITReplacement : public ExecutionEngine {
   OrcMCJITReplacement(std::shared_ptr<MCJITMemoryManager> MemMgr,
                       std::shared_ptr<LegacyJITSymbolResolver> ClientResolver,
                       std::unique_ptr<TargetMachine> TM)
-      : ExecutionEngine(TM->createDataLayout()),
-        TM(std::move(TM)),
+      : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
         MemMgr(
             std::make_shared<MCJITReplacementMemMgr>(*this, std::move(MemMgr))),
         Resolver(std::make_shared<LinkingORCResolver>(*this)),
         ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
         NotifyFinalized(*this),
         ObjectLayer(
-            ES,
+            AcknowledgeORCv1Deprecation, ES,
             [this](VModuleKey K) {
               return ObjectLayerT::Resources{this->MemMgr, this->Resolver};
             },
             NotifyObjectLoaded, NotifyFinalized),
-        CompileLayer(ObjectLayer, SimpleCompiler(*this->TM),
+        CompileLayer(AcknowledgeORCv1Deprecation, ObjectLayer,
+                     SimpleCompiler(*this->TM),
                      [this](VModuleKey K, std::unique_ptr<Module> M) {
                        Modules.push_back(std::move(M));
                      }),
-        LazyEmitLayer(CompileLayer) {}
+        LazyEmitLayer(AcknowledgeORCv1Deprecation, CompileLayer) {}
 
   static void Register() {
     OrcMCJITReplacementCtor = createOrcMCJITReplacement;
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 054a2a742904e..b22ecd5f80a13 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -207,5 +207,14 @@ void RTDyldObjectLinkingLayer::onObjEmit(
     NotifyEmitted(K, std::move(ObjBuffer));
 }
 
+LegacyRTDyldObjectLinkingLayer::LegacyRTDyldObjectLinkingLayer(
+    ExecutionSession &ES, ResourcesGetter GetResources,
+    NotifyLoadedFtor NotifyLoaded, NotifyFinalizedFtor NotifyFinalized,
+    NotifyFreedFtor NotifyFreed)
+    : ES(ES), GetResources(std::move(GetResources)),
+      NotifyLoaded(std::move(NotifyLoaded)),
+      NotifyFinalized(std::move(NotifyFinalized)),
+      NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {}
+
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index fb6cf6ff91593..8c8cd88c97112 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -83,18 +83,15 @@ namespace {
                                  cl::desc("Force interpretation: disable JIT"),
                                  cl::init(false));
 
-  cl::opt<JITKind> UseJITKind("jit-kind",
-                              cl::desc("Choose underlying JIT kind."),
-                              cl::init(JITKind::MCJIT),
-                              cl::values(
-                                clEnumValN(JITKind::MCJIT, "mcjit",
-                                           "MCJIT"),
-                                clEnumValN(JITKind::OrcMCJITReplacement,
-                                           "orc-mcjit",
-                                           "Orc-based MCJIT replacement"),
-                                clEnumValN(JITKind::OrcLazy,
-                                           "orc-lazy",
-                                           "Orc-based lazy JIT.")));
+  cl::opt<JITKind> UseJITKind(
+      "jit-kind", cl::desc("Choose underlying JIT kind."),
+      cl::init(JITKind::MCJIT),
+      cl::values(clEnumValN(JITKind::MCJIT, "mcjit", "MCJIT"),
+                 clEnumValN(JITKind::OrcMCJITReplacement, "orc-mcjit",
+                            "Orc-based MCJIT replacement "
+                            "(deprecated)"),
+                 clEnumValN(JITKind::OrcLazy, "orc-lazy",
+                            "Orc-based lazy JIT.")));
 
   cl::opt<unsigned>
   LazyJITCompileThreads("compile-threads",
@@ -419,7 +416,8 @@ int main(int argc, char **argv, char * const *envp) {
   builder.setEngineKind(ForceInterpreter
                         ? EngineKind::Interpreter
                         : EngineKind::JIT);
-  builder.setUseOrcMCJITReplacement(UseJITKind == JITKind::OrcMCJITReplacement);
+  builder.setUseOrcMCJITReplacement(AcknowledgeORCv1Deprecation,
+                                    UseJITKind == JITKind::OrcMCJITReplacement);
 
   // If we are supposed to override the target triple, do so now.
   if (!TargetTriple.empty())
@@ -665,6 +663,7 @@ int main(int argc, char **argv, char * const *envp) {
     // Forward MCJIT's symbol resolution calls to the remote.
     static_cast<ForwardingMemoryManager *>(RTDyldMM)->setResolver(
         orc::createLambdaResolver(
+            AcknowledgeORCv1Deprecation,
             [](const std::string &Name) { return nullptr; },
             [&](const std::string &Name) {
               if (auto Addr = ExitOnErr(R->getSymbolAddress(Name)))
diff --git a/llvm/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
index 88d12cd103a92..cc67aa07b802a 100644
--- a/llvm/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/LazyEmittingLayerTest.cpp
@@ -22,7 +22,8 @@ struct MockBaseLayer {
 
 TEST(LazyEmittingLayerTest, Empty) {
   MockBaseLayer M;
-  llvm::orc::LazyEmittingLayer<MockBaseLayer> L(M);
+  llvm::orc::LazyEmittingLayer<MockBaseLayer> L(
+      llvm::AcknowledgeORCv1Deprecation, M);
   cantFail(
       L.addModule(llvm::orc::VModuleKey(), std::unique_ptr<llvm::Module>()));
 }
diff --git a/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp
index 9a526071c84c4..06b96f9fd38ef 100644
--- a/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp
@@ -76,7 +76,7 @@ TEST(LegacyCompileOnDemandLayerTest, FindSymbol) {
   };
 
   llvm::orc::LegacyCompileOnDemandLayer<decltype(TestBaseLayer)> COD(
-      ES, TestBaseLayer, GetResolver, SetResolver,
+      AcknowledgeORCv1Deprecation, ES, TestBaseLayer, GetResolver, SetResolver,
       [](Function &F) { return std::set<Function *>{&F}; }, CallbackMgr,
       [] { return llvm::make_unique<DummyStubsManager>(); }, true);
 
diff --git a/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp
index 04b6f2101b256..001019daa4b37 100644
--- a/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp
@@ -68,10 +68,11 @@ TEST(LegacyRTDyldObjectLinkingLayerTest, TestSetProcessAllSections) {
 
   ExecutionSession ES;
 
-  LegacyRTDyldObjectLinkingLayer ObjLayer(ES, [&MM](VModuleKey) {
-    return LegacyRTDyldObjectLinkingLayer::Resources{
-        MM, std::make_shared<NullResolver>()};
-  });
+  LegacyRTDyldObjectLinkingLayer ObjLayer(
+      AcknowledgeORCv1Deprecation, ES, [&MM](VModuleKey) {
+        return LegacyRTDyldObjectLinkingLayer::Resources{
+            MM, std::make_shared<NullResolver>()};
+      });
 
   LLVMContext Context;
   auto M = llvm::make_unique<Module>("", Context);
@@ -130,13 +131,14 @@ TEST_F(LegacyRTDyldObjectLinkingLayerExecutionTest, NoDuplicateFinalization) {
 
   std::map<orc::VModuleKey, std::shared_ptr<orc::SymbolResolver>> Resolvers;
 
-  LegacyRTDyldObjectLinkingLayer ObjLayer(ES, [&](VModuleKey K) {
-    auto I = Resolvers.find(K);
-    assert(I != Resolvers.end() && "Missing resolver");
-    auto R = std::move(I->second);
-    Resolvers.erase(I);
-    return LegacyRTDyldObjectLinkingLayer::Resources{MM, std::move(R)};
-  });
+  LegacyRTDyldObjectLinkingLayer ObjLayer(
+      AcknowledgeORCv1Deprecation, ES, [&](VModuleKey K) {
+        auto I = Resolvers.find(K);
+        assert(I != Resolvers.end() && "Missing resolver");
+        auto R = std::move(I->second);
+        Resolvers.erase(I);
+        return LegacyRTDyldObjectLinkingLayer::Resources{MM, std::move(R)};
+      });
   SimpleCompiler Compile(*TM);
 
   // Create a pair of modules that will trigger recursive finalization:
@@ -217,10 +219,11 @@ TEST_F(LegacyRTDyldObjectLinkingLayerExecutionTest, NoPrematureAllocation) {
 
   auto MM = std::make_shared<SectionMemoryManagerWrapper>();
 
-  LegacyRTDyldObjectLinkingLayer ObjLayer(ES, [&MM](VModuleKey K) {
-    return LegacyRTDyldObjectLinkingLayer::Resources{
-        MM, std::make_shared<NullResolver>()};
-  });
+  LegacyRTDyldObjectLinkingLayer ObjLayer(
+      AcknowledgeORCv1Deprecation, ES, [&MM](VModuleKey K) {
+        return LegacyRTDyldObjectLinkingLayer::Resources{
+            MM, std::make_shared<NullResolver>()};
+      });
   SimpleCompiler Compile(*TM);
 
   // Create a pair of unrelated modules:
@@ -278,7 +281,7 @@ TEST_F(LegacyRTDyldObjectLinkingLayerExecutionTest, NoPrematureAllocation) {
 TEST_F(LegacyRTDyldObjectLinkingLayerExecutionTest, TestNotifyLoadedSignature) {
   ExecutionSession ES;
   LegacyRTDyldObjectLinkingLayer ObjLayer(
-      ES,
+      AcknowledgeORCv1Deprecation, ES,
       [](VModuleKey) {
         return LegacyRTDyldObjectLinkingLayer::Resources{
             nullptr, std::make_shared<NullResolver>()};
diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
index 2b6b5a7670d4b..2ff7e91a73234 100644
--- a/llvm/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
@@ -182,17 +182,19 @@ TEST(LegacyObjectTransformLayerTest, Main) {
 
   // Create one object transform layer using a transform (as a functor)
   // that allocates new objects, and deals in unique pointers.
-  LegacyObjectTransformLayer<MockBaseLayer, AllocatingTransform> T1(M);
+  LegacyObjectTransformLayer<MockBaseLayer, AllocatingTransform> T1(
+      llvm::AcknowledgeORCv1Deprecation, M);
 
   // Create a second object transform layer using a transform (as a lambda)
   // that mutates objects in place, and deals in naked pointers
   LegacyObjectTransformLayer<MockBaseLayer,
-                         std::function<std::shared_ptr<MockObjectFile>(
-                           std::shared_ptr<MockObjectFile>)>>
-    T2(M, [](std::shared_ptr<MockObjectFile> Obj) {
-    ++(*Obj);
-    return Obj;
-  });
+                             std::function<std::shared_ptr<MockObjectFile>(
+                                 std::shared_ptr<MockObjectFile>)>>
+      T2(llvm::AcknowledgeORCv1Deprecation, M,
+         [](std::shared_ptr<MockObjectFile> Obj) {
+           ++(*Obj);
+           return Obj;
+         });
 
   // Test addObject with T1 (allocating)
   auto K1 = ES.allocateVModule();
@@ -281,22 +283,25 @@ TEST(LegacyObjectTransformLayerTest, Main) {
   };
 
   // Construct the jit layers.
-  LegacyRTDyldObjectLinkingLayer BaseLayer(ES, [](VModuleKey) {
-    return LegacyRTDyldObjectLinkingLayer::Resources{
-        std::make_shared<llvm::SectionMemoryManager>(),
-        std::make_shared<NullResolver>()};
-  });
+  LegacyRTDyldObjectLinkingLayer BaseLayer(
+      llvm::AcknowledgeORCv1Deprecation, ES, [](VModuleKey) {
+        return LegacyRTDyldObjectLinkingLayer::Resources{
+            std::make_shared<llvm::SectionMemoryManager>(),
+            std::make_shared<NullResolver>()};
+      });
 
   auto IdentityTransform = [](std::unique_ptr<llvm::MemoryBuffer> Obj) {
     return Obj;
   };
   LegacyObjectTransformLayer<decltype(BaseLayer), decltype(IdentityTransform)>
-      TransformLayer(BaseLayer, IdentityTransform);
+      TransformLayer(llvm::AcknowledgeORCv1Deprecation, BaseLayer,
+                     IdentityTransform);
   auto NullCompiler = [](llvm::Module &) {
     return std::unique_ptr<llvm::MemoryBuffer>(nullptr);
   };
   LegacyIRCompileLayer<decltype(TransformLayer), decltype(NullCompiler)>
-    CompileLayer(TransformLayer, NullCompiler);
+      CompileLayer(llvm::AcknowledgeORCv1Deprecation, TransformLayer,
+                   NullCompiler);
 
   // Make sure that the calls from LegacyIRCompileLayer to LegacyObjectTransformLayer
   // compile.
diff --git a/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp
index cb02a91cb6c2b..4377d5267796e 100644
--- a/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/RemoteObjectLayerTest.cpp
@@ -127,7 +127,8 @@ TEST(RemoteObjectLayer, AddObject) {
   std::copy(ObjBytes.begin(), ObjBytes.end(), ObjContents.begin());
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
   MockObjectLayer BaseLayer(
@@ -144,9 +145,8 @@ TEST(RemoteObjectLayer, AddObject) {
 
       return 1;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -181,7 +181,8 @@ TEST(RemoteObjectLayer, AddObjectFailure) {
     };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
   MockObjectLayer BaseLayer(
@@ -191,9 +192,8 @@ TEST(RemoteObjectLayer, AddObjectFailure) {
       return make_error<StringError>("AddObjectFailure - Test Message",
                                      inconvertibleErrorCode());
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -233,7 +233,8 @@ TEST(RemoteObjectLayer, RemoveObject) {
   };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
 
@@ -243,9 +244,8 @@ TEST(RemoteObjectLayer, RemoveObject) {
       SymTab[1] = MockObjectLayer::LookupFn();
       return 1;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -283,7 +283,8 @@ TEST(RemoteObjectLayer, RemoveObjectFailure) {
     };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
 
@@ -294,9 +295,8 @@ TEST(RemoteObjectLayer, RemoveObjectFailure) {
        MockObjectLayer::SymbolLookupTable &SymTab) {
       return 42;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -339,7 +339,8 @@ TEST(RemoteObjectLayer, FindSymbol) {
     };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
 
@@ -358,9 +359,8 @@ TEST(RemoteObjectLayer, FindSymbol) {
         };
       return 42;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -421,7 +421,8 @@ TEST(RemoteObjectLayer, FindSymbolIn) {
     };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
 
@@ -447,9 +448,8 @@ TEST(RemoteObjectLayer, FindSymbolIn) {
 
       return 42;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -496,7 +496,8 @@ TEST(RemoteObjectLayer, EmitAndFinalize) {
   };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
 
@@ -506,9 +507,8 @@ TEST(RemoteObjectLayer, EmitAndFinalize) {
       SymTab[1] = MockObjectLayer::LookupFn();
       return 1;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(
@@ -547,7 +547,8 @@ TEST(RemoteObjectLayer, EmitAndFinalizeFailure) {
     };
 
   RPCEndpoint ClientEP(*Channels.first, true);
-  RemoteObjectClientLayer<RPCEndpoint> Client(ClientEP, ReportError);
+  RemoteObjectClientLayer<RPCEndpoint> Client(AcknowledgeORCv1Deprecation,
+                                              ClientEP, ReportError);
 
   RPCEndpoint ServerEP(*Channels.second, true);
 
@@ -556,9 +557,8 @@ TEST(RemoteObjectLayer, EmitAndFinalizeFailure) {
        MockObjectLayer::SymbolLookupTable &SymTab) {
       return 1;
     });
-  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(BaseLayer,
-                                                               ServerEP,
-                                                               ReportError);
+  RemoteObjectServerLayer<MockObjectLayer, RPCEndpoint> Server(
+      AcknowledgeORCv1Deprecation, BaseLayer, ServerEP, ReportError);
 
   bool Finished = false;
   ServerEP.addHandler<remote::utils::TerminateSession>(

From 3b96ebeee473c263a743066861687b320de3e6d6 Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Wed, 17 Jul 2019 16:43:36 +0000
Subject: [PATCH 363/451] [NativePDB] Add a FromCompilerDecl for going from
 lldb -> clang

Summary:
A common transformation in NativePDB is to go from lldb types to clang
types and vice versa. This function automates one of those steps.

Differential Revision: https://reviews.llvm.org/D64851

llvm-svn: 366345
---
 lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 4 ++++
 lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h   | 1 +
 2 files changed, 5 insertions(+)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index eb8da2a51b6cb..4991be8e70ced 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -1354,6 +1354,10 @@ PdbAstBuilder::ToCompilerDeclContext(clang::DeclContext &context) {
   return {&m_clang, &context};
 }
 
+clang::Decl * PdbAstBuilder::FromCompilerDecl(CompilerDecl decl) {
+  return static_cast<clang::Decl *>(decl.GetOpaqueDecl());
+}
+
 clang::DeclContext *
 PdbAstBuilder::FromCompilerDeclContext(CompilerDeclContext context) {
   return static_cast<clang::DeclContext *>(context.GetOpaqueDeclContext());
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h
index 8331abeaf47da..67d024741e0db 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h
@@ -76,6 +76,7 @@ class PdbAstBuilder {
   CompilerDecl ToCompilerDecl(clang::Decl &decl);
   CompilerType ToCompilerType(clang::QualType qt);
   CompilerDeclContext ToCompilerDeclContext(clang::DeclContext &context);
+  clang::Decl * FromCompilerDecl(CompilerDecl decl);
   clang::DeclContext *FromCompilerDeclContext(CompilerDeclContext context);
 
   ClangASTContext &clang() { return m_clang; }

From 7f24757b8eb2a3e698a55e6a32c79829c4025bde Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 17 Jul 2019 16:47:02 +0000
Subject: [PATCH 364/451] [CMake] Move standalone check so we don't have to
 reconfigure LLDB

By moving the standalone check into the main CMake file, the whole file
is ignored in a regular (non-standalone) build. This means that you can
make changes to LLDBStandalone.cmake without having to reconfigure a
build in a different directory. This matters when you share one source
repository with different build directories (e.g. release-assert, debug,
standalone).

Differential revision: https://reviews.llvm.org/D64824

llvm-svn: 366346
---
 lldb/CMakeLists.txt                     |   7 +-
 lldb/cmake/modules/LLDBStandalone.cmake | 195 ++++++++++++------------
 2 files changed, 102 insertions(+), 100 deletions(-)

diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index 3217cb14384c2..878faaa969843 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -11,7 +11,12 @@ set(CMAKE_MODULE_PATH
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
   )
 
-include(LLDBStandalone)
+# If we are not building as part of LLVM, build LLDB as a standalone project,
+# using LLVM as an external library.
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  include(LLDBStandalone)
+endif()
+
 include(LLDBConfig)
 include(AddLLDB)
 
diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 803f6bda968b2..2d7ee2574e3f7 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -1,3 +1,30 @@
+project(lldb)
+
+option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
+
+set(LLDB_PATH_TO_LLVM_BUILD "" CACHE PATH "Path to LLVM build tree")
+set(LLDB_PATH_TO_CLANG_BUILD "${LLDB_PATH_TO_LLVM_BUILD}" CACHE PATH "Path to Clang build tree")
+
+file(TO_CMAKE_PATH "${LLDB_PATH_TO_LLVM_BUILD}" LLDB_PATH_TO_LLVM_BUILD)
+file(TO_CMAKE_PATH "${LLDB_PATH_TO_CLANG_BUILD}" LLDB_PATH_TO_CLANG_BUILD)
+
+find_package(LLVM REQUIRED CONFIG
+  HINTS "${LLDB_PATH_TO_LLVM_BUILD}" NO_CMAKE_FIND_ROOT_PATH)
+find_package(Clang REQUIRED CONFIG
+  HINTS "${LLDB_PATH_TO_CLANG_BUILD}" NO_CMAKE_FIND_ROOT_PATH)
+
+# We set LLVM_CMAKE_PATH so that GetSVN.cmake is found correctly when building SVNVersion.inc
+set(LLVM_CMAKE_PATH ${LLVM_CMAKE_DIR} CACHE PATH "Path to LLVM CMake modules")
+
+set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
+set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
+set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR} CACHE PATH "Path to LLVM build tree")
+
+set(lit_file_name "llvm-lit")
+if(CMAKE_HOST_WIN32 AND NOT CYGWIN)
+  set(lit_file_name "${lit_file_name}.py")
+endif()
+
 function(append_configuration_directories input_dir output_dirs)
   set(dirs_list ${input_dir})
   foreach(config_type ${LLVM_CONFIGURATION_TYPES})
@@ -7,118 +34,88 @@ function(append_configuration_directories input_dir output_dirs)
   set(${output_dirs} ${dirs_list} PARENT_SCOPE)
 endfunction()
 
-# If we are not building as a part of LLVM, build LLDB as an
-# standalone project, using LLVM as an external library:
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
-  project(lldb)
-
-  option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
-
-  set(LLDB_PATH_TO_LLVM_BUILD "" CACHE PATH "Path to LLVM build tree")
-  set(LLDB_PATH_TO_CLANG_BUILD "${LLDB_PATH_TO_LLVM_BUILD}" CACHE PATH "Path to Clang build tree")
-
-  file(TO_CMAKE_PATH "${LLDB_PATH_TO_LLVM_BUILD}" LLDB_PATH_TO_LLVM_BUILD)
-  file(TO_CMAKE_PATH "${LLDB_PATH_TO_CLANG_BUILD}" LLDB_PATH_TO_CLANG_BUILD)
-
-  find_package(LLVM REQUIRED CONFIG
-    HINTS "${LLDB_PATH_TO_LLVM_BUILD}" NO_CMAKE_FIND_ROOT_PATH)
-  find_package(Clang REQUIRED CONFIG
-    HINTS "${LLDB_PATH_TO_CLANG_BUILD}" NO_CMAKE_FIND_ROOT_PATH)
 
-  # We set LLVM_CMAKE_PATH so that GetSVN.cmake is found correctly when building SVNVersion.inc
-  set(LLVM_CMAKE_PATH ${LLVM_CMAKE_DIR} CACHE PATH "Path to LLVM CMake modules")
+append_configuration_directories(${LLVM_TOOLS_BINARY_DIR} config_dirs)
+find_program(lit_full_path ${lit_file_name} ${config_dirs} NO_DEFAULT_PATH)
+set(LLVM_DEFAULT_EXTERNAL_LIT ${lit_full_path} CACHE PATH "Path to llvm-lit")
 
-  set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
-  set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-  set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR} CACHE PATH "Path to LLVM build tree")
-
-  set(lit_file_name "llvm-lit")
-  if(CMAKE_HOST_WIN32 AND NOT CYGWIN)
-    set(lit_file_name "${lit_file_name}.py")
+if(CMAKE_CROSSCOMPILING)
+  set(LLVM_NATIVE_BUILD "${LLDB_PATH_TO_LLVM_BUILD}/NATIVE")
+  if (NOT EXISTS "${LLVM_NATIVE_BUILD}")
+    message(FATAL_ERROR
+      "Attempting to cross-compile LLDB standalone but no native LLVM build
+      found. Please cross-compile LLVM as well.")
   endif()
 
-  append_configuration_directories(${LLVM_TOOLS_BINARY_DIR} config_dirs)
-  find_program(lit_full_path ${lit_file_name} ${config_dirs} NO_DEFAULT_PATH)
-  set(LLVM_DEFAULT_EXTERNAL_LIT ${lit_full_path} CACHE PATH "Path to llvm-lit")
-
-  if(CMAKE_CROSSCOMPILING)
-    set(LLVM_NATIVE_BUILD "${LLDB_PATH_TO_LLVM_BUILD}/NATIVE")
-    if (NOT EXISTS "${LLVM_NATIVE_BUILD}")
-      message(FATAL_ERROR
-        "Attempting to cross-compile LLDB standalone but no native LLVM build
-        found. Please cross-compile LLVM as well.")
-    endif()
-
-    if (CMAKE_HOST_SYSTEM_NAME MATCHES "Windows")
-      set(HOST_EXECUTABLE_SUFFIX ".exe")
-    endif()
-
-    if (NOT CMAKE_CONFIGURATION_TYPES)
-      set(LLVM_TABLEGEN_EXE
-        "${LLVM_NATIVE_BUILD}/bin/llvm-tblgen${HOST_EXECUTABLE_SUFFIX}")
-    else()
-      # NOTE: LLVM NATIVE build is always built Release, as is specified in
-      # CrossCompile.cmake
-      set(LLVM_TABLEGEN_EXE
-        "${LLVM_NATIVE_BUILD}/Release/bin/llvm-tblgen${HOST_EXECUTABLE_SUFFIX}")
-    endif()
-  else()
-    set(tblgen_file_name "llvm-tblgen${CMAKE_EXECUTABLE_SUFFIX}")
-    append_configuration_directories(${LLVM_TOOLS_BINARY_DIR} config_dirs)
-    find_program(LLVM_TABLEGEN_EXE ${tblgen_file_name} ${config_dirs} NO_DEFAULT_PATH)
+  if (CMAKE_HOST_SYSTEM_NAME MATCHES "Windows")
+    set(HOST_EXECUTABLE_SUFFIX ".exe")
   endif()
 
-  # They are used as destination of target generators.
-  set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
-  set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
-  if(WIN32 OR CYGWIN)
-    # DLL platform -- put DLLs into bin.
-    set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
+  if (NOT CMAKE_CONFIGURATION_TYPES)
+    set(LLVM_TABLEGEN_EXE
+      "${LLVM_NATIVE_BUILD}/bin/llvm-tblgen${HOST_EXECUTABLE_SUFFIX}")
   else()
-    set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
+    # NOTE: LLVM NATIVE build is always built Release, as is specified in
+    # CrossCompile.cmake
+    set(LLVM_TABLEGEN_EXE
+      "${LLVM_NATIVE_BUILD}/Release/bin/llvm-tblgen${HOST_EXECUTABLE_SUFFIX}")
   endif()
+else()
+  set(tblgen_file_name "llvm-tblgen${CMAKE_EXECUTABLE_SUFFIX}")
+  append_configuration_directories(${LLVM_TOOLS_BINARY_DIR} config_dirs)
+  find_program(LLVM_TABLEGEN_EXE ${tblgen_file_name} ${config_dirs} NO_DEFAULT_PATH)
+endif()
 
-  # We append the directory in which LLVMConfig.cmake lives. We expect LLVM's
-  # CMake modules to be in that directory as well.
-  list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}")
-  include(AddLLVM)
-  include(TableGen)
-  include(HandleLLVMOptions)
-  include(CheckAtomic)
-  include(LLVMDistributionSupport)
-
-  set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 2.7)
-  if (PYTHON_EXECUTABLE STREQUAL "")
-    include(FindPythonInterp)
-    if( NOT PYTHONINTERP_FOUND )
-      message(FATAL_ERROR
-              "Unable to find Python interpreter, required for builds and testing.
-               Please install Python or specify the PYTHON_EXECUTABLE CMake variable.")
-    endif()
-  else()
-    message(STATUS "Found PythonInterp: ${PYTHON_EXECUTABLE}")
+# They are used as destination of target generators.
+set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
+set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+if(WIN32 OR CYGWIN)
+  # DLL platform -- put DLLs into bin.
+  set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
+else()
+  set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
+endif()
+
+# We append the directory in which LLVMConfig.cmake lives. We expect LLVM's
+# CMake modules to be in that directory as well.
+list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}")
+include(AddLLVM)
+include(TableGen)
+include(HandleLLVMOptions)
+include(CheckAtomic)
+include(LLVMDistributionSupport)
+
+set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 2.7)
+if (PYTHON_EXECUTABLE STREQUAL "")
+  include(FindPythonInterp)
+  if( NOT PYTHONINTERP_FOUND )
+    message(FATAL_ERROR
+            "Unable to find Python interpreter, required for builds and testing.
+              Please install Python or specify the PYTHON_EXECUTABLE CMake variable.")
   endif()
+else()
+  message(STATUS "Found PythonInterp: ${PYTHON_EXECUTABLE}")
+endif()
 
-  set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
-  set(LLVM_INCLUDE_TESTS ON CACHE INTERNAL "")
+set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
+set(LLVM_INCLUDE_TESTS ON CACHE INTERNAL "")
 
-  option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON)
-  if(LLVM_USE_FOLDERS)
-    set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-  endif()
+option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON)
+if(LLVM_USE_FOLDERS)
+  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+endif()
 
-  set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "lldb misc")
-  set_target_properties(intrinsics_gen PROPERTIES FOLDER "lldb misc")
+set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "lldb misc")
+set_target_properties(intrinsics_gen PROPERTIES FOLDER "lldb misc")
 
-  set(CMAKE_INCLUDE_CURRENT_DIR ON)
-  include_directories(
-    "${CMAKE_BINARY_DIR}/include"
-    "${LLVM_INCLUDE_DIRS}"
-    "${CLANG_INCLUDE_DIRS}")
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+include_directories(
+  "${CMAKE_BINARY_DIR}/include"
+  "${LLVM_INCLUDE_DIRS}"
+  "${CLANG_INCLUDE_DIRS}")
 
-  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
 
-  set(LLDB_BUILT_STANDALONE 1)
-endif()
+set(LLDB_BUILT_STANDALONE 1)

From 3fce6b5da169c77c4047a4632f28b41d33d7c0a9 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Wed, 17 Jul 2019 16:51:16 +0000
Subject: [PATCH 365/451] [lldb] Make log for ClangModulesDeclVendor's compiler
 flag less verbose

Summary:
Currently the ClangModulesDeclVendor is spamming the expression log with the compiler flags it is using, which creates a log that looks like this:

```
clang

 -fmodules

 -fimplicit-module-maps
```

This patch removes all these newlines and just prints the compiler flags in one line as you see in the command line:

```
clang -fmodules -fimplicit-module-maps [...]
```

Reviewers: shafik, davide

Reviewed By: davide

Subscribers: davide, abidh, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D64858

llvm-svn: 366347
---
 .../Clang/ClangModulesDeclVendor.cpp            | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp
index 941306fc0c307..4a220790e50dc 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.cpp
@@ -632,21 +632,16 @@ ClangModulesDeclVendor::Create(Target &target) {
       clang::CompilerInstance::createDiagnostics(new clang::DiagnosticOptions,
                                                  new StoringDiagnosticConsumer);
 
-  Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
-  if (log)
-    log->PutString("ClangModulesDeclVendor::Create()");
   std::vector<const char *> compiler_invocation_argument_cstrs;
   compiler_invocation_argument_cstrs.reserve(
       compiler_invocation_arguments.size());
-  for (const std::string &arg : compiler_invocation_arguments) {
+  for (const std::string &arg : compiler_invocation_arguments)
     compiler_invocation_argument_cstrs.push_back(arg.c_str());
-    if (log) {
-      log->PutString("\n  ");
-      log->PutString(arg);
-    }
-  }
-  if (log)
-    log->PutString("\n");
+
+  Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
+  LLDB_LOG(log, "ClangModulesDeclVendor's compiler flags {0:$[ ]}",
+           llvm::make_range(compiler_invocation_arguments.begin(),
+                            compiler_invocation_arguments.end()));
 
   std::shared_ptr<clang::CompilerInvocation> invocation =
       clang::createInvocationFromCommandLine(compiler_invocation_argument_cstrs,

From d912a9ba9b1647984dc65d34ba4422874bc28855 Mon Sep 17 00:00:00 2001
From: Daniil Fukalov <daniil.fukalov@amd.com>
Date: Wed, 17 Jul 2019 16:51:29 +0000
Subject: [PATCH 366/451] [AMDGPU] Tune inlining parameters for AMDGPU target

Summary:
Since the target has no significant advantage of vectorization,
vector instructions bous threshold bonus should be optional.

amdgpu-inline-arg-alloca-cost parameter default value and the target
InliningThresholdMultiplier value tuned then respectively.

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, eraman, hiraditya, haicheng, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64642

llvm-svn: 366348
---
 .../llvm/Analysis/TargetTransformInfo.h       | 16 ++++++++++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 ++
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  2 ++
 llvm/lib/Analysis/InlineCost.cpp              | 11 +------
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  4 +++
 llvm/lib/Target/AMDGPU/AMDGPUInline.cpp       |  2 +-
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  4 ++-
 llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll     |  7 -----
 .../Inline/AMDGPU/inline-amdgpu-vecbonus.ll   | 31 +++++++++++++++++++
 9 files changed, 60 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index af1a12dc18dea..7574b811bc1c4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -263,6 +263,18 @@ class TargetTransformInfo {
   /// individual classes of instructions would be better.
   unsigned getInliningThresholdMultiplier() const;
 
+  /// \returns Vector bonus in percent.
+  ///
+  /// Vector bonuses: We want to more aggressively inline vector-dense kernels
+  /// and apply this bonus based on the percentage of vector instructions. A
+  /// bonus is applied if the vector instructions exceed 50% and half that amount
+  /// is applied if it exceeds 10%. Note that these bonuses are some what
+  /// arbitrary and evolved over time by accident as much as because they are
+  /// principled bonuses.
+  /// FIXME: It would be nice to base the bonus values on something more
+  /// scientific. A target may has no bonus on vector instructions.
+  int getInlinerVectorBonusPercent() const;
+
   /// Estimate the cost of an intrinsic when lowered.
   ///
   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
@@ -1128,6 +1140,7 @@ class TargetTransformInfo::Concept {
   virtual int getCallCost(const Function *F,
                           ArrayRef<const Value *> Arguments, const User *U) = 0;
   virtual unsigned getInliningThresholdMultiplier() = 0;
+  virtual int getInlinerVectorBonusPercent() = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                                ArrayRef<Type *> ParamTys, const User *U) = 0;
   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
@@ -1351,6 +1364,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   unsigned getInliningThresholdMultiplier() override {
     return Impl.getInliningThresholdMultiplier();
   }
+  int getInlinerVectorBonusPercent() override {
+    return Impl.getInlinerVectorBonusPercent();
+  }
   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                        ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
     return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a9383e795fca4..b99e1eb9adf05 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -140,6 +140,8 @@ class TargetTransformInfoImplBase {
 
   unsigned getInliningThresholdMultiplier() { return 1; }
 
+  int getInlinerVectorBonusPercent() { return 150; }
+
   unsigned getMemcpyCost(const Instruction *I) {
     return TTI::TCC_Expensive;
   }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c2d050d9ec855..70bf670fdf0bc 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -427,6 +427,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
   unsigned getInliningThresholdMultiplier() { return 1; }
 
+  int getInlinerVectorBonusPercent() { return 150; }
+
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP) {
     // This unrolling functionality is target independent, but to provide some
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 3cb56f8cccf59..0dec146e04656 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -880,15 +880,6 @@ void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
   // basic block at the given callsite context. This is speculatively applied
   // and withdrawn if more than one basic block is seen.
   //
-  // Vector bonuses: We want to more aggressively inline vector-dense kernels
-  // and apply this bonus based on the percentage of vector instructions. A
-  // bonus is applied if the vector instructions exceed 50% and half that amount
-  // is applied if it exceeds 10%. Note that these bonuses are some what
-  // arbitrary and evolved over time by accident as much as because they are
-  // principled bonuses.
-  // FIXME: It would be nice to base the bonus values on something more
-  // scientific.
-  //
   // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
   // of the last call to a static function as inlining such functions is
   // guaranteed to reduce code size.
@@ -896,7 +887,7 @@ void CallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
   // These bonus percentages may be set to 0 based on properties of the caller
   // and the callsite.
   int SingleBBBonusPercent = 50;
-  int VectorBonusPercent = 150;
+  int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
   int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
 
   // Lambda to set all the above bonus and bonus percentages to 0.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 50c5ae9c19fa8..eb04c34453fb3 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -176,6 +176,10 @@ unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
   return TTIImpl->getInliningThresholdMultiplier();
 }
 
+int TargetTransformInfo::getInlinerVectorBonusPercent() const {
+  return TTIImpl->getInlinerVectorBonusPercent();
+}
+
 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
                                     ArrayRef<const Value *> Operands) const {
   return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
index ec0dd6df44abd..f4df20b8f03ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInline.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
 #define DEBUG_TYPE "inline"
 
 static cl::opt<int>
-ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
+ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(1500),
               cl::desc("Cost of alloca argument"));
 
 // If the amount of scratch memory to eliminate exceeds our ability to allocate
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 72882c83c01c1..6f1bf5a26f0d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -191,7 +191,9 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
-  unsigned getInliningThresholdMultiplier() { return 9; }
+  unsigned getInliningThresholdMultiplier() { return 7; }
+
+  int getInlinerVectorBonusPercent() { return 0; }
 
   int getArithmeticReductionCost(unsigned Opcode,
                                  Type *Ty,
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
index 75c16d006ae4c..c2f1836f44afd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -28,15 +28,8 @@ if.end:                                           ; preds = %if.then, %entry
 define coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) {
 entry:
   %tmp1 = load float, float addrspace(5)* %p1, align 4
-  %cmp = fcmp ogt float %tmp1, 1.000000e+00
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
   %div = fdiv float 2.000000e+00, %tmp1
   store float %div, float addrspace(5)* %p2, align 4
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll
new file mode 100644
index 0000000000000..cf28d4fe4ab5f
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-vecbonus.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-inline --inline-threshold=1 < %s | FileCheck %s
+
+define hidden <16 x i32> @div_vecbonus(<16 x i32> %x, <16 x i32> %y) {
+entry:
+  %div.1 = udiv <16 x i32> %x, %y
+  %div.2 = udiv <16 x i32> %div.1, %y
+  %div.3 = udiv <16 x i32> %div.2, %y
+  %div.4 = udiv <16 x i32> %div.3, %y
+  %div.5 = udiv <16 x i32> %div.4, %y
+  %div.6 = udiv <16 x i32> %div.5, %y
+  %div.7 = udiv <16 x i32> %div.6, %y
+  %div.8 = udiv <16 x i32> %div.7, %y
+  %div.9 = udiv <16 x i32> %div.8, %y
+  %div.10 = udiv <16 x i32> %div.9, %y
+  %div.11 = udiv <16 x i32> %div.10, %y
+  %div.12 = udiv <16 x i32> %div.11, %y
+  ret <16 x i32> %div.12
+}
+
+; CHECK-LABEL: define amdgpu_kernel void @caller_vecbonus
+; CHECK-NOT: udiv
+; CHECK: tail call <16 x i32> @div_vecbonus
+; CHECK: ret void
+define amdgpu_kernel void @caller_vecbonus(<16 x i32> addrspace(1)* nocapture %x, <16 x i32> addrspace(1)* nocapture readonly %y) {
+entry:
+  %tmp = load <16 x i32>, <16 x i32> addrspace(1)* %x
+  %tmp1 = load <16 x i32>, <16 x i32> addrspace(1)* %y
+  %div.i = tail call <16 x i32> @div_vecbonus(<16 x i32> %tmp, <16 x i32> %tmp1)
+  store <16 x i32> %div.i, <16 x i32> addrspace(1)* %x
+  ret void
+}

From 59532488b1d65c52aa5122bd7e191fde9a9df589 Mon Sep 17 00:00:00 2001
From: Ron Lieberman <ronlieb.g@gmail.com>
Date: Wed, 17 Jul 2019 17:07:52 +0000
Subject: [PATCH 367/451] [OPENMP] Resolve lost LoopTripCnt for subsequent
 loops in same thread.

Remove loopTripCnt from threaded device stack after consuming it.
Added a libomptarget DP message to aid in future debugging and to
validate the added testcase, which only runs in Debug build.

Differential Revision: https://reviews.llvm.org/D64808

llvm-svn: 366349
---
 openmp/libomptarget/src/omptarget.cpp         |  7 ++--
 .../test/offloading/looptripcnt.c             | 36 +++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 openmp/libomptarget/test/offloading/looptripcnt.c

diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 39b5cb7da861f..c41bf3167ccc1 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -732,8 +732,11 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
   uint64_t ltc = 0;
   TblMapMtx.lock();
   auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL));
-  if (I != Device.LoopTripCnt.end())
-    std::swap(ltc, I->second);
+  if (I != Device.LoopTripCnt.end()) {
+    ltc = I->second;
+    Device.LoopTripCnt.erase(I);
+    DP("loop trip count is %lu.\n", ltc);
+  }
   TblMapMtx.unlock();
 
   // Launch device execution.
diff --git a/openmp/libomptarget/test/offloading/looptripcnt.c b/openmp/libomptarget/test/offloading/looptripcnt.c
new file mode 100644
index 0000000000000..025231b0c6d32
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/looptripcnt.c
@@ -0,0 +1,36 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
+// REQUIRES: libomptarget-debug
+
+/*
+  Test for looptripcount being popped from runtime stack.
+*/
+#include <stdio.h>
+#include <omp.h>
+int main()
+{
+  int N = 128;
+  int NN = 1024;
+  int num_teams[NN];
+  int num_threads[NN];
+
+  printf("#pragma omp target teams distribute parallel for thread_limit(4)\n");
+#pragma omp target teams distribute parallel for thread_limit(4)
+  for (int j = 0; j< N; j++) {
+    num_threads[j] = omp_get_num_threads();
+    num_teams[j] = omp_get_num_teams();
+  }
+  printf("num_threads %d num_teams %d\n", num_threads[0], num_teams[0]);
+// DEBUG: loop trip count is 128
+  printf("#pragma omp target teams distribute parallel for\n");
+#pragma omp target teams distribute parallel for
+  for (int j = 0; j< N; j++) {
+    num_threads[j] = omp_get_num_threads();
+    num_teams[j] = omp_get_num_teams();
+  }
+  printf("num_threads %d num_teams %d\n", num_threads[0], num_teams[0]);
+// DEBUG: loop trip count is 128
+  return 0;
+}

From ee24b40b9e9f8355f1cb6c92f6406396f9b3042d Mon Sep 17 00:00:00 2001
From: Stefan Granitz <stefan.graenitz@gmail.com>
Date: Wed, 17 Jul 2019 17:14:40 +0000
Subject: [PATCH 368/451] [CMake] Avoid liblldb genex when figuring out the
 copy destination for framework tools

This genex created an order-only dependency to liblldb for every framework tool. It reduced build throughput in the first half of the compilation and pulled in unnecessary build units, e.g. debugserver required ~900 build units. With this change debugserver is (again) down at 52 build units!

llvm-svn: 366350
---
 lldb/cmake/modules/AddLLDB.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake
index e588232986738..540d01362f0d8 100644
--- a/lldb/cmake/modules/AddLLDB.cmake
+++ b/lldb/cmake/modules/AddLLDB.cmake
@@ -199,7 +199,8 @@ endfunction()
 function(lldb_add_to_buildtree_lldb_framework name subdir)
   # Destination for the copy in the build-tree. While the framework target may
   # not exist yet, it will exist when the generator expression gets expanded.
-  set(copy_dest "$<TARGET_FILE_DIR:liblldb>/../../../${subdir}")
+  get_target_property(framework_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY)
+  set(copy_dest "${framework_build_dir}/${subdir}")
 
   # Copy into the given subdirectory for testing.
   add_custom_command(TARGET ${name} POST_BUILD

From 79f4e4770b72131e53cace75dbd62f5ed43c39bd Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Wed, 17 Jul 2019 17:21:31 +0000
Subject: [PATCH 369/451] [Docs][OpenCL] Documentation of C++ for OpenCL mode

Added documentation of C++ for OpenCL mode into Clang
User Manual and Language Extensions document.

Differential Revision: https://reviews.llvm.org/D64418

llvm-svn: 366351
---
 clang/docs/LanguageExtensions.rst | 269 ++++++++++++++++++++++++++++++
 clang/docs/UsersManual.rst        |  42 ++++-
 2 files changed, 310 insertions(+), 1 deletion(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 44fa2b2ec0980..cb72c459c1e5b 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1518,6 +1518,275 @@ parameters of protocol-qualified type.
 Query the presence of this new mangling with
 ``__has_feature(objc_protocol_qualifier_mangling)``.
 
+
+OpenCL Features
+===============
+
+C++ for OpenCL
+--------------
+
+This functionality is built on top of OpenCL C v2.0 and C++17. Regular C++
+features can be used in OpenCL kernel code. All functionality from OpenCL C
+is inherited. This section describes minor differences to OpenCL C and any
+limitations related to C++ support as well as interactions between OpenCL and
+C++ features that are not documented elsewhere.
+
+Restrictions to C++17
+^^^^^^^^^^^^^^^^^^^^^
+
+The following features are not supported:
+
+- Virtual functions
+- ``dynamic_cast`` operator
+- Non-placement ``new``/``delete`` operators
+- Standard C++ libraries. Currently there is no solution for alternative C++
+  libraries provided. Future release will feature library support.
+
+
+Interplay of OpenCL and C++ features
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Address space behavior
+""""""""""""""""""""""
+
+Address spaces are part of the type qualifiers; many rules are just inherited
+from the qualifier behavior documented in OpenCL C v2.0 s6.5 and Embedded C
+extension ISO/IEC JTC1 SC22 WG14 N1021 s3.1. Note that since the address space
+behavior in C++ is not documented formally yet, Clang extends existing concept
+from C and OpenCL. For example conversion rules are extended from qualification
+conversion but the compatibility is determined using sets and overlapping from
+Embedded C (ISO/IEC JTC1 SC22 WG14 N1021 s3.1.3). For OpenCL it means that
+implicit conversions are allowed from named to ``__generic`` but not vice versa
+(OpenCL C v2.0 s6.5.5) except for ``__constant`` address space. Most of the
+rules are built on top of this behavior.
+
+**Casts**
+
+C style cast will follow OpenCL C v2.0 rules (s6.5.5). All cast operators will
+permit implicit conversion to ``__generic``. However converting from named
+address spaces to ``__generic`` can only be done using ``addrspace_cast``. Note
+that conversions between ``__constant`` and any other is still disallowed.
+
+.. _opencl_cpp_addrsp_deduction:
+
+**Deduction**
+
+Address spaces are not deduced for:
+
+- non-pointer/non-reference template parameters or any dependent types except
+  for template specializations.
+- non-pointer/non-reference class members except for static data members that are
+  deduced to ``__global`` address space.
+- non-pointer/non-reference alias declarations.
+- ``decltype`` expression.
+
+.. code-block:: c++
+
+  template <typename T>
+  void foo() {
+    T m; // address space of m will be known at template instantiation time.
+    T * ptr; // ptr points to __generic address space object.
+    T & ref = ...; // ref references an object in __generic address space.
+  };
+
+  template <int N>
+  struct S {
+    int i; // i has no address space
+    static int ii; // ii is in global address space
+    int * ptr; // ptr points to __generic address space int.
+    int & ref = ...; // ref references int in __generic address space.
+  };
+
+  template <int N>
+  void bar()
+  {
+    S<N> s; // s is in __private address space
+  }
+
+TODO: Add example for type alias and decltype!
+
+**References**
+
+References types can be qualified with an address space.
+
+.. code-block:: c++
+
+  __private int & ref = ...; // references int in __private address space
+
+By default references will refer to ``__generic`` address space objects, except
+for dependent types that are not template specializations
+(see :ref:`Deduction <opencl_cpp_addrsp_deduction>`). Address space compatibility
+checks are performed when references are bound to values. The logic follows the
+rules from address space pointer conversion (OpenCL v2.0 s6.5.5).
+
+**Default address space**
+
+All non-static member functions take an implicit object parameter ``this`` that
+is a pointer type. By default this pointer parameter is in ``__generic`` address
+space. All concrete objects passed as an argument to ``this`` parameter will be
+converted to ``__generic`` address space first if the conversion is valid.
+Therefore programs using objects in ``__constant`` address space won't be compiled
+unless address space is explicitly specified using address space qualifiers on
+member functions
+(see :ref:`Member function qualifier <opencl_cpp_addrspace_method_qual>`) as the
+conversion between ``__constant`` and ``__generic`` is disallowed. Member function
+qualifiers can also be used in case conversion to ``__generic`` address space is
+undesirable (even if it is legal), for example to take advantage of memory bank
+accesses. Note this not only applies to regular member functions but to
+constructors and destructors too.
+
+.. _opencl_cpp_addrspace_method_qual:
+
+**Member function qualifier**
+
+Clang allows specifying address space qualifier on member functions to signal that
+they are to be used with objects constructed in some specific address space. This
+works just the same as qualifying member functions with ``const`` or any other
+qualifiers. The overloading resolution will select overload with most specific
+address space if multiple candidates are provided. If there is no conversion to
+to an address space among existing overloads compilation will fail with a
+diagnostic.
+
+.. code-block:: c++
+
+ struct C {
+    void foo() __local;
+    void foo();
+ };
+
+ __kernel void bar() {
+   __local C c1;
+   C c2;
+   __constant C c3;
+   c1.foo(); // will resolve to the first foo
+   c2.foo(); // will resolve to the second foo
+   c3.foo(); // error due to mismatching address spaces - can't convert to
+             // __local or __generic
+ }
+
+**Implicit special members**
+
+All implicit special members (default, copy, or move constructor, copy or move
+assignment, destructor) will be generated with ``__generic`` address space.
+
+.. code-block:: c++
+
+  class C {
+    // Has the following implicit definition
+    // void C() __generic;
+    // void C(const __generic C &) __generic;
+    // void C(__generic C &&) __generic;
+    // operator= '__generic C &(__generic C &&)'
+    // operator= '__generic C &(const __generic C &) __generic
+  }
+
+**Builtin operators**
+
+All builtin operators are available in the specific address spaces, thus no conversion
+to ``__generic`` is performed.
+
+**Templates**
+
+There is no deduction of address spaces in non-pointer/non-reference template parameters
+and dependent types (see :ref:`Deduction <opencl_cpp_addrsp_deduction>`). The address
+space of template parameter is deduced during the type deduction if it's not explicitly
+provided in instantiation.
+
+.. code-block:: c++
+
+  1 template<typename T>
+  2 void foo(T* i){
+  3   T var;
+  4 }
+  5
+  6 __global int g;
+  7 void bar(){
+  8   foo(&g); // error: template instantiation failed as function scope variable appears to
+  9            // be declared in __global address space (see line 3)
+ 10 }
+
+It is not legal to specify multiple different address spaces between template definition and
+instantiation. If multiple different address spaces are specified in template definition and
+instantiation compilation of such program will fail with a diagnostic.
+
+.. code-block:: c++
+
+  template <typename T>
+  void foo() {
+    __private T var;
+  }
+
+  void bar() {
+    foo<__global int>(); // error: conflicting address space qualifiers are provided __global
+                         // and __private
+  }
+
+Once template is instantiated regular restrictions for address spaces will apply.
+
+.. code-block:: c++
+
+  template<typename T>
+  void foo(){
+    T var;
+  }
+
+  void bar(){
+    foo<__global int>(); // error: function scope variable cannot be declared in __global
+                         // address space
+  }
+
+**Temporary materialization**
+
+All temporaries are materialized in ``__private`` address space. If a reference with some
+other address space is bound to them, the conversion will be generated in case it's valid
+otherwise compilation will fail with a diagnostic.
+
+.. code-block:: c++
+
+  int bar(const unsigned int &i);
+
+  void foo() {
+    bar(1); // temporary is created in __private address space but converted
+            // to __generic address space of parameter reference
+  }
+
+  __global const int& f(__global float &ref) {
+    return ref; // error: address space mismatch between temporary object
+                // created to hold value converted float->int and return
+                // value type (can't convert from __private to __global)
+  }
+
+**Initialization of local and constant address space objects**
+
+TODO
+
+Constructing and destroying global objects
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Global objects are constructed before the first kernel using the global
+objects is executed and destroyed just after the last kernel using the
+program objects is executed. In OpenCL v2.0 drivers there is no specific
+API for invoking global constructors. However, an easy workaround would be
+to enqueue constructor initialization kernel that has a name
+``@_GLOBAL__sub_I_<compiled file name>``. This kernel is only present if there
+are any global objects to be initialized in the compiled binary. One way to
+check this is by passing ``CL_PROGRAM_KERNEL_NAMES`` to ``clGetProgramInfo``
+(OpenCL v2.0 s5.8.7).
+
+Note that if multiple files are compiled and linked into libraries multiple
+kernels that initialize global objects for multiple modules would have to be
+invoked.
+
+.. code-block:: console
+
+ clang -cl-std=c++ test.cl
+
+If there are any global objects to be initialized the final binary will
+contain ``@_GLOBAL__sub_I_test.cl`` kernel to be enqueued.
+
+Global destructors can not be invoked in OpenCL v2.0 drivers. However, all
+memory used for program scope objects is released on ``clReleaseProgram``.
+
 Initializer lists for complex numbers in C
 ==========================================
 
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 001003a3fa6fc..2fbb414f6820c 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2397,7 +2397,8 @@ Compiling to bitcode can be done as follows:
 This will produce a generic test.bc file that can be used in vendor toolchains
 to perform machine code generation.
 
-Clang currently supports OpenCL C language standards up to v2.0.
+Clang currently supports OpenCL C language standards up to v2.0. Starting from Clang9
+C++ mode is available for OpenCL (see :ref:`C++ for OpenCL <opencl_cpp>`).
 
 OpenCL Specific Options
 -----------------------
@@ -2756,6 +2757,45 @@ There are some standard OpenCL functions that are implemented as Clang builtins:
   enqueue query functions from `section 6.13.17.5
   <https://www.khronos.org/registry/cl/specs/opencl-2.0-openclc.pdf#171>`_.
 
+.. _opencl_cpp:
+
+C++ for OpenCL
+--------------
+
+Starting from Clang9 kernel code can contain C++17 features: classes, templates,
+function overloading, type deduction, etc. Please note that this is not an
+implementation of `OpenCL C++
+<https://www.khronos.org/registry/OpenCL/specs/2.2/pdf/OpenCL_Cxx.pdf>`_ and
+there is no plan to support it in clang in any new releases in the near future.
+
+There are only a few restrictions on allowed C++ features. For detailed information
+please refer to documentation on Extensions (:doc:`LanguageExtensions`).
+
+Since C++ features are to be used on top of OpenCL C functionality, all existing
+restrictions from OpenCL C v2.0 will inherently apply. All OpenCL C builtin types
+and function libraries are supported and can be used in the new mode.
+
+To enable the new mode pass the following command line option when compiling ``.cl``
+file ``-cl-std=c++`` or ``-std=c++``.
+
+   .. code-block:: c++
+
+     template<class T> T add( T x, T y )
+     {
+       return x + y;
+     }
+
+     __kernel void test( __global float* a, __global float* b)
+     {
+       auto index = get_global_id(0);
+       a[index] = add(b[index], b[index+1]);
+     }
+
+
+   .. code-block:: console
+
+     clang -cl-std=c++ test.cl
+
 .. _target_features:
 
 Target-Specific Features and Limitations

From dce1954f455fc35ea3586f342db5077d505f1057 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 17 Jul 2019 17:22:29 +0000
Subject: [PATCH 370/451] [Test] Add module cache for TestWeakSymbols

Explicitly set the module cache in the Makefile with
-fmodules-cache-path.

llvm-svn: 366352
---
 .../lldbsuite/test/expression_command/weak_symbols/Makefile     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/weak_symbols/Makefile b/lldb/packages/Python/lldbsuite/test/expression_command/weak_symbols/Makefile
index 7c70b509fe737..ea434d427142a 100644
--- a/lldb/packages/Python/lldbsuite/test/expression_command/weak_symbols/Makefile
+++ b/lldb/packages/Python/lldbsuite/test/expression_command/weak_symbols/Makefile
@@ -22,5 +22,5 @@ dylib2.o: dylib.h $(SRCDIR)/dylib.c
 	$(CC)  $(CFLAGS) -c $(SRCDIR)/dylib.c -o dylib2.o
 
 main.o: dylib.h $(SRCDIR)/main.c
-	$(CC)  $(CFLAGS) -c $(SRCDIR)/main.c -fmodules
+	$(CC)  $(CFLAGS) -c $(SRCDIR)/main.c -fmodules -fmodules-cache-path=$(CLANG_MODULE_CACHE_DIR)
 

From b53e13cd43e8aacf26222186b241346484962a21 Mon Sep 17 00:00:00 2001
From: Nathan Huckleberry <nhuck@google.com>
Date: Wed, 17 Jul 2019 17:22:43 +0000
Subject: [PATCH 371/451] [clang-tidy] Fix crash on end location inside macro

Summary:
Lexer::getLocForEndOfToken is defined to return an
invalid location if the given location is inside a macro.
Other checks conditionally warn based off location
validity. Updating this check to do the same.

Reviewers: JonasToth, aaron.ballman, nickdesaulniers

Reviewed By: nickdesaulniers

Subscribers: lebedev.ri, nickdesaulniers, xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64607

llvm-svn: 366353
---
 .../clang-tidy/bugprone/BranchCloneCheck.cpp    | 17 +++++++++++------
 .../bugprone-branch-clone-macro-crash.c         | 14 ++++++++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 clang-tools-extra/test/clang-tidy/bugprone-branch-clone-macro-crash.c

diff --git a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp
index a89831188b7e2..eb54aaa994457 100644
--- a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp
@@ -132,9 +132,12 @@ void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
           // We report the first occurence only when we find the second one.
           diag(Branches[i]->getBeginLoc(),
                "repeated branch in conditional chain");
-          diag(Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
-                                          *Result.SourceManager, getLangOpts()),
-               "end of the original", DiagnosticIDs::Note);
+          SourceLocation End =
+              Lexer::getLocForEndOfToken(Branches[i]->getEndLoc(), 0,
+                                         *Result.SourceManager, getLangOpts());
+          if (End.isValid()) {
+            diag(End, "end of the original", DiagnosticIDs::Note);
+          }
         }
 
         diag(Branches[j]->getBeginLoc(), "clone %0 starts here",
@@ -208,10 +211,12 @@ void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) {
 
         if (EndLoc.isMacroID())
           EndLoc = Context.getSourceManager().getExpansionLoc(EndLoc);
+        EndLoc = Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
+                                            getLangOpts());
 
-        diag(Lexer::getLocForEndOfToken(EndLoc, 0, *Result.SourceManager,
-                                        getLangOpts()),
-             "last of these clones ends here", DiagnosticIDs::Note);
+        if (EndLoc.isValid()) {
+          diag(EndLoc, "last of these clones ends here", DiagnosticIDs::Note);
+        }
       }
       BeginCurrent = EndCurrent;
     }
diff --git a/clang-tools-extra/test/clang-tidy/bugprone-branch-clone-macro-crash.c b/clang-tools-extra/test/clang-tidy/bugprone-branch-clone-macro-crash.c
new file mode 100644
index 0000000000000..ce0c0137d0a37
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/bugprone-branch-clone-macro-crash.c
@@ -0,0 +1,14 @@
+// RUN: %check_clang_tidy %s bugprone-branch-clone %t
+int x = 0;
+int y = 1;
+#define a(b, c) \
+  typeof(b) d;  \
+  if (b)        \
+    d = b;      \
+  else if (c)   \
+    d = b;
+
+f() {
+  // CHECK-MESSAGES: warning: repeated branch in conditional chain [bugprone-branch-clone]
+  a(x, y)
+}

From 337aea438c5eebe6a149e5fafe38b71ec93caf8d Mon Sep 17 00:00:00 2001
From: Julie Hockett <juliehockett@google.com>
Date: Wed, 17 Jul 2019 17:40:53 +0000
Subject: [PATCH 372/451] [clang-tidy] Exclude forward decls from
 fuchsia-multiple-inheritance

Addresses b39770.

Differential Revision: https://reviews.llvm.org/D64813

llvm-svn: 366354
---
 .../clang-tidy/fuchsia/MultipleInheritanceCheck.cpp            | 3 ++-
 .../test/clang-tidy/fuchsia-multiple-inheritance.cpp           | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp
index a5f9d6ed46054..404b4beddd247 100644
--- a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp
+++ b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.cpp
@@ -93,7 +93,8 @@ void MultipleInheritanceCheck::registerMatchers(MatchFinder *Finder) {
     return;
 
   // Match declarations which have bases.
-  Finder->addMatcher(cxxRecordDecl(hasBases()).bind("decl"), this);
+  Finder->addMatcher(
+      cxxRecordDecl(allOf(hasBases(), isDefinition())).bind("decl"), this);
 }
 
 void MultipleInheritanceCheck::check(const MatchFinder::MatchResult &Result) {
diff --git a/clang-tools-extra/test/clang-tidy/fuchsia-multiple-inheritance.cpp b/clang-tools-extra/test/clang-tidy/fuchsia-multiple-inheritance.cpp
index fd2ed145c12b4..c7869761cf85e 100644
--- a/clang-tools-extra/test/clang-tidy/fuchsia-multiple-inheritance.cpp
+++ b/clang-tools-extra/test/clang-tidy/fuchsia-multiple-inheritance.cpp
@@ -41,6 +41,9 @@ class Interface_with_A_Parent : public Base_A {
   virtual int baz() = 0;
 };
 
+// Shouldn't warn on forward declarations.
+class Bad_Child1;
+
 // Inherits from multiple concrete classes.
 // CHECK-MESSAGES: [[@LINE+2]]:1: warning: inheriting mulitple classes that aren't pure virtual is discouraged [fuchsia-multiple-inheritance]
 // CHECK-NEXT: class Bad_Child1 : public Base_A, Base_B {};

From 0e2b74a2b0b875a601851f389bf4bd2fa7b33735 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov@arm.com>
Date: Wed, 17 Jul 2019 17:43:32 +0000
Subject: [PATCH 373/451] Revert [AArch64] Add support for Transactional Memory
 Extension (TME)

This reverts r366322 (git commit 4b8da3a503e434ddbc08ecf66582475765f449bc)

llvm-svn: 366355
---
 clang/include/clang/Basic/BuiltinsAArch64.def |  6 --
 clang/lib/Basic/Targets/AArch64.cpp           |  6 --
 clang/lib/Basic/Targets/AArch64.h             |  1 -
 clang/lib/Headers/arm_acle.h                  | 24 +-------
 clang/lib/Sema/SemaChecking.cpp               |  1 -
 .../test/CodeGen/aarch64-tme-tcancel-arg.cpp  | 10 ----
 clang/test/CodeGen/aarch64-tme.c              | 36 ------------
 clang/test/Sema/aarch64-tme-errors.c          |  8 ---
 .../Sema/aarch64-tme-tcancel-const-error.c    |  4 --
 .../Sema/aarch64-tme-tcancel-range-error.c    |  4 --
 llvm/include/llvm/IR/IntrinsicsAArch64.td     | 17 ------
 .../llvm/Support/AArch64TargetParser.def      |  1 -
 .../llvm/Support/AArch64TargetParser.h        |  1 -
 llvm/lib/Target/AArch64/AArch64.td            |  3 -
 .../lib/Target/AArch64/AArch64InstrFormats.td | 55 ++-----------------
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 29 ++--------
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |  2 -
 llvm/test/CodeGen/AArch64/tme-tcancel.ll      | 16 ------
 llvm/test/CodeGen/AArch64/tme-tcommit.ll      | 16 ------
 llvm/test/CodeGen/AArch64/tme-tstart.ll       | 16 ------
 llvm/test/CodeGen/AArch64/tme-ttest.ll        | 16 ------
 llvm/test/MC/AArch64/tme-error.s              | 47 ----------------
 llvm/test/MC/AArch64/tme.s                    | 24 --------
 llvm/test/MC/Disassembler/AArch64/tme.txt     | 19 -------
 llvm/unittests/Support/TargetParserTest.cpp   |  1 -
 25 files changed, 13 insertions(+), 350 deletions(-)
 delete mode 100644 clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp
 delete mode 100644 clang/test/CodeGen/aarch64-tme.c
 delete mode 100644 clang/test/Sema/aarch64-tme-errors.c
 delete mode 100644 clang/test/Sema/aarch64-tme-tcancel-const-error.c
 delete mode 100644 clang/test/Sema/aarch64-tme-tcancel-range-error.c
 delete mode 100644 llvm/test/CodeGen/AArch64/tme-tcancel.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/tme-tcommit.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/tme-tstart.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/tme-ttest.ll
 delete mode 100644 llvm/test/MC/AArch64/tme-error.s
 delete mode 100644 llvm/test/MC/AArch64/tme.s
 delete mode 100644 llvm/test/MC/Disassembler/AArch64/tme.txt

diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index a144979acca6d..7701ad98f4832 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -91,12 +91,6 @@ LANGBUILTIN(__sevl,  "v", "",   ALL_MS_LANGUAGES)
 // Misc
 BUILTIN(__builtin_sponentry, "v*", "c")
 
-// Transactional Memory Extension
-BUILTIN(__builtin_arm_tstart, "WUi", "nj")
-BUILTIN(__builtin_arm_tcommit, "v", "n")
-BUILTIN(__builtin_arm_tcancel, "vWUIi", "nr")
-BUILTIN(__builtin_arm_ttest, "WUi", "nc")
-
 TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 2abca0a660ae4..74ac69ab8946a 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -219,9 +219,6 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasMTE)
     Builder.defineMacro("__ARM_FEATURE_MEMORY_TAGGING", "1");
 
-  if (HasTME)
-    Builder.defineMacro("__ARM_FEATURE_TME", "1");
-
   if ((FPU & NeonMode) && HasFP16FML)
     Builder.defineMacro("__ARM_FEATURE_FP16FML", "1");
 
@@ -273,7 +270,6 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
   HasDotProd = false;
   HasFP16FML = false;
   HasMTE = false;
-  HasTME = false;
   ArchKind = llvm::AArch64::ArchKind::ARMV8A;
 
   for (const auto &Feature : Features) {
@@ -305,8 +301,6 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasFP16FML = true;
     if (Feature == "+mte")
       HasMTE = true;
-    if (Feature == "+tme")
-      HasTME = true;
   }
 
   setDataLayout();
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index b6aa07780edda..5833c146003b0 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -35,7 +35,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool HasDotProd;
   bool HasFP16FML;
   bool HasMTE;
-  bool HasTME;
 
   llvm::AArch64::ArchKind ArchKind;
 
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 0510e6fd809f2..096cc261af2c6 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -613,7 +613,7 @@ __jcvt(double __a) {
 #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
 #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
 
-/* Memory Tagging Extensions (MTE) Intrinsics */
+// Memory Tagging Extensions (MTE) Intrinsics
 #if __ARM_FEATURE_MEMORY_TAGGING
 #define __arm_mte_create_random_tag(__ptr, __mask)  __builtin_arm_irg(__ptr, __mask)
 #define __arm_mte_increment_tag(__ptr, __tag_offset)  __builtin_arm_addg(__ptr, __tag_offset)
@@ -623,28 +623,6 @@ __jcvt(double __a) {
 #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
 #endif
 
-/* Transactional Memory Extension (TME) Intrinsics */
-#if __ARM_FEATURE_TME
-
-#define _TMFAILURE_REASON  0x00007fffu
-#define _TMFAILURE_RTRY    0x00008000u
-#define _TMFAILURE_CNCL    0x00010000u
-#define _TMFAILURE_MEM     0x00020000u
-#define _TMFAILURE_IMP     0x00040000u
-#define _TMFAILURE_ERR     0x00080000u
-#define _TMFAILURE_SIZE    0x00100000u
-#define _TMFAILURE_NEST    0x00200000u
-#define _TMFAILURE_DBG     0x00400000u
-#define _TMFAILURE_INT     0x00800000u
-#define _TMFAILURE_TRIVIAL 0x01000000u
-
-#define __tstart()        __builtin_arm_tstart()
-#define __tcommit()       __builtin_arm_tcommit()
-#define __tcancel(__arg)  __builtin_arm_tcancel(__arg)
-#define __ttest()         __builtin_arm_ttest()
-
-#endif /* __ARM_FEATURE_TME */
-
 #if defined(__cplusplus)
 }
 #endif
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 442cbcf1429b6..f9f82cdeef432 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1928,7 +1928,6 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,
   case AArch64::BI__builtin_arm_dmb:
   case AArch64::BI__builtin_arm_dsb:
   case AArch64::BI__builtin_arm_isb: l = 0; u = 15; break;
-  case AArch64::BI__builtin_arm_tcancel: l = 0; u = 65535; break;
   }
 
   return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
diff --git a/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp b/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp
deleted file mode 100644
index ae6694703c838..0000000000000
--- a/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s
-
-#define A -1
-constexpr int f() { return 65536; }
-
-void t_cancel() {
-	__builtin_arm_tcancel(f() + A);
-}
-
-// CHECK: call void @llvm.aarch64.tcancel(i64 65535)
diff --git a/clang/test/CodeGen/aarch64-tme.c b/clang/test/CodeGen/aarch64-tme.c
deleted file mode 100644
index 8f90fb8eb8297..0000000000000
--- a/clang/test/CodeGen/aarch64-tme.c
+++ /dev/null
@@ -1,36 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -DUSE_ACLE  -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s
-
-#ifdef USE_ACLE
-#include "arm_acle.h"
-void test_tme_funcs() {
-  __tstart();
-  (void)__ttest();
-  __tcommit();
-  __tcancel(0x789a);
-}
-#else
-void test_tme_funcs() {
-  __builtin_arm_tstart();
-  (void)__builtin_arm_ttest();
-  __builtin_arm_tcommit();
-  __builtin_arm_tcancel(0x789a);
-}
-#endif
-// CHECK: call i64 @llvm.aarch64.tstart()
-// CHECK: call i64 @llvm.aarch64.ttest()
-// CHECK: call void @llvm.aarch64.tcommit()
-// CHECK: call void @llvm.aarch64.tcancel(i64 30874)
-
-// CHECK: declare i64 @llvm.aarch64.tstart() #1
-// CHECK: declare i64 @llvm.aarch64.ttest() #1
-// CHECK: declare void @llvm.aarch64.tcommit() #1
-// CHECK: declare void @llvm.aarch64.tcancel(i64 immarg) #2
-
-#ifdef __ARM_FEATURE_TME
-void arm_feature_tme_defined() {}
-#endif
-// CHECK: define void @arm_feature_tme_defined()
-
-// CHECK: attributes #1 = { nounwind }
-// CHECK: attributes #2 = { noreturn nounwind }
diff --git a/clang/test/Sema/aarch64-tme-errors.c b/clang/test/Sema/aarch64-tme-errors.c
deleted file mode 100644
index 0e9c2a6beec0c..0000000000000
--- a/clang/test/Sema/aarch64-tme-errors.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-eabi -verify %s
-
-#include "arm_acle.h"
-
-void test_no_tme_funcs() {
-  __tstart();         // expected-warning{{implicit declaration of function '__tstart'}}
-  __builtin_tstart(); // expected-error{{use of unknown builtin '__builtin_tstart'}}
-}
diff --git a/clang/test/Sema/aarch64-tme-tcancel-const-error.c b/clang/test/Sema/aarch64-tme-tcancel-const-error.c
deleted file mode 100644
index f97ece59b660b..0000000000000
--- a/clang/test/Sema/aarch64-tme-tcancel-const-error.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -verify %s
-void t_cancel(unsigned short u) {
-  __builtin_arm_tcancel(u); // expected-error{{argument to '__builtin_arm_tcancel' must be a constant integer}}
-}
diff --git a/clang/test/Sema/aarch64-tme-tcancel-range-error.c b/clang/test/Sema/aarch64-tme-tcancel-range-error.c
deleted file mode 100644
index c61ec90dfa501..0000000000000
--- a/clang/test/Sema/aarch64-tme-tcancel-range-error.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -verify %s
-void t_cancel() {
-  __builtin_arm_tcancel(0x12345u); // expected-error{{argument value 74565 is outside the valid range [0, 65535]}}
-}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ceec212b66303..7616d6a90c1bc 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -703,20 +703,3 @@ def int_aarch64_stg   : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
 def int_aarch64_subp :  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
     [IntrNoMem]>;
 }
-
-// Transactional Memory Extension (TME) Intrinsics
-let TargetPrefix = "aarch64" in {
-def int_aarch64_tstart  : GCCBuiltin<"__builtin_arm_tstart">,
-                         Intrinsic<[llvm_i64_ty]>;
-
-def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>;
-
-def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">,
-                          Intrinsic<[], [llvm_i64_ty],
-                                    [ImmArg<0>, IntrNoMem, IntrHasSideEffects,
-                                     IntrNoReturn]>;
-
-def int_aarch64_ttest   : GCCBuiltin<"__builtin_arm_ttest">,
-                          Intrinsic<[llvm_i64_ty], [],
-                                    [IntrNoMem, IntrHasSideEffects]>;
-}
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index fd21e3615b71e..e152f383b3ec0 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -79,7 +79,6 @@ AARCH64_ARCH_EXT_NAME("memtag",    AArch64::AEK_MTE,      "+mte",   "-mte")
 AARCH64_ARCH_EXT_NAME("ssbs",      AArch64::AEK_SSBS,     "+ssbs",  "-ssbs")
 AARCH64_ARCH_EXT_NAME("sb",        AArch64::AEK_SB,       "+sb",    "-sb")
 AARCH64_ARCH_EXT_NAME("predres",   AArch64::AEK_PREDRES,  "+predres", "-predres")
-AARCH64_ARCH_EXT_NAME("tme",       AArch64::AEK_TME,      "+tme",   "-tme")
 #undef AARCH64_ARCH_EXT_NAME
 
 #ifndef AARCH64_CPU_NAME
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
index 564f831b07069..965d38535e747 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -54,7 +54,6 @@ enum ArchExtKind : unsigned {
   AEK_SVE2SM4 =     1 << 25,
   AEK_SVE2SHA3 =    1 << 26,
   AEK_BITPERM =     1 << 27,
-  AEK_TME =         1 << 28,
 };
 
 enum class ArchKind {
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index fcd5818727f12..e39c6995e3673 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -345,9 +345,6 @@ def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen",
 def FeatureMTE : SubtargetFeature<"mte", "HasMTE",
     "true", "Enable Memory Tagging Extension" >;
 
-def FeatureTME : SubtargetFeature<"tme", "HasTME",
-    "true", "Enable Transactional Memory Extension" >;
-
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 2af5726fc4f5e..74fa5ef713d9e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -714,15 +714,12 @@ def logical_imm64_not : Operand<i64> {
   let ParserMatchClass = LogicalImm64NotOperand;
 }
 
-// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
-let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
-def i32_imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+// imm0_65535 predicate - True if the immediate is in the range [0,65535].
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint32_t)Imm) < 65536;
-}]>;
-
-def i64_imm0_65535 : Operand<i64>, ImmLeaf<i64, [{
-  return ((uint64_t)Imm) < 65536;
-}]>;
+}]> {
+  let ParserMatchClass = AsmImmRange<0, 65535>;
+  let PrintMethod = "printImmHex";
 }
 
 // imm0_255 predicate - True if the immediate is in the range [0,255].
@@ -1085,46 +1082,6 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
   let Inst{4-0} = Rt;
 }
 
-// System instructions for transactional memory extension
-class TMBaseSystemI<bit L, bits<4> CRm, bits<3> op2, dag oops, dag iops,
-                    string asm, string operands, list<dag> pattern>
-    : BaseSystemI<L, oops, iops, asm, operands, pattern>,
-      Sched<[WriteSys]> {
-  let Inst{20-12} = 0b000110011;
-  let Inst{11-8} = CRm;
-  let Inst{7-5} = op2;
-  let DecoderMethod = "";
-
-  let mayLoad = 1;
-  let mayStore = 1;
-}
-
-// System instructions for transactional memory - single input operand
-class TMSystemI<bits<4> CRm, string asm, list<dag> pattern>
-    : TMBaseSystemI<0b1, CRm, 0b011,
-                    (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> {
-  bits<5> Rt;
-  let Inst{4-0} = Rt;
-}
-
-// System instructions for transactional memory - no operand
-class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
-    : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> {
-  let Inst{4-0} = 0b11111;
-}
-
-// System instructions for exit from transactions
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
-    : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>,
-      Sched<[WriteSys]> {
-  bits<16> imm;
-  let Inst{31-24} = 0b11010100;
-  let Inst{23-21} = op1;
-  let Inst{20-5}  = imm;
-  let Inst{4-0}   = 0b00000;
-}
-
 // Hint instructions that take both a CRm and a 3-bit immediate.
 // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
 // model patterns with sufficiently fine granularity
@@ -4129,7 +4086,7 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
 class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
-    : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>,
+    : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
       Sched<[WriteSys]> {
   bits<16> imm;
   let Inst{31-24} = 0b11010100;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 74f07f569a518..897b3ebb3847f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -133,8 +133,6 @@ def HasBTI           : Predicate<"Subtarget->hasBTI()">,
                        AssemblerPredicate<"FeatureBranchTargetId", "bti">;
 def HasMTE           : Predicate<"Subtarget->hasMTE()">,
                        AssemblerPredicate<"FeatureMTE", "mte">;
-def HasTME           : Predicate<"Subtarget->hasTME()">,
-                       AssemblerPredicate<"FeatureTME", "tme">;
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
@@ -800,21 +798,6 @@ def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
                 (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
                  sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
 
-
-let Predicates = [HasTME] in {
-
-def TSTART : TMSystemI<0b0000, "tstart", [(set GPR64:$Rt, (int_aarch64_tstart))]>;
-
-def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
-
-let mayLoad = 0, mayStore = 0 in {
-def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]>;
-def TCANCEL : TMSystemException<0b011, "tcancel", [(int_aarch64_tcancel i64_imm0_65535:$imm)]> {
-  let isBarrier = 1;
-}
-}
-} // HasTME
-
 //===----------------------------------------------------------------------===//
 // Move immediate instructions.
 //===----------------------------------------------------------------------===//
@@ -826,12 +809,12 @@ let PostEncoderMethod = "fixMOVZ" in
 defm MOVZ : MoveImmediate<0b10, "movz">;
 
 // First group of aliases covers an implicit "lsl #0".
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
 
 // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
 def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index ce829795309c2..0c84cfb8329a6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -134,7 +134,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   bool HasBTI = false;
   bool HasRandGen = false;
   bool HasMTE = false;
-  bool HasTME = false;
 
   // Arm SVE2 extensions
   bool HasSVE2AES = false;
@@ -381,7 +380,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   bool hasBTI() const { return HasBTI; }
   bool hasRandGen() const { return HasRandGen; }
   bool hasMTE() const { return HasMTE; }
-  bool hasTME() const { return HasTME; }
   // Arm SVE2 extensions
   bool hasSVE2AES() const { return HasSVE2AES; }
   bool hasSVE2SM4() const { return HasSVE2SM4; }
diff --git a/llvm/test/CodeGen/AArch64/tme-tcancel.ll b/llvm/test/CodeGen/AArch64/tme-tcancel.ll
deleted file mode 100644
index f4fb7b665de16..0000000000000
--- a/llvm/test/CodeGen/AArch64/tme-tcancel.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o - | FileCheck %s
-
-target triple = "aarch64-unknown-unknown-eabi"
-
-define void @test_tcancel() #0 {
-  tail call void @llvm.aarch64.tcancel(i64 0) #1
-  unreachable
-}
-
-declare void @llvm.aarch64.tcancel(i64 immarg) #1
-
-attributes #0 = { "target-features"="+tme" }
-attributes #1 = { nounwind noreturn }
-
-; CHECK-LABEL: test_tcancel
-; CHECK: tcancel
diff --git a/llvm/test/CodeGen/AArch64/tme-tcommit.ll b/llvm/test/CodeGen/AArch64/tme-tcommit.ll
deleted file mode 100644
index cd85a3e5bd8ec..0000000000000
--- a/llvm/test/CodeGen/AArch64/tme-tcommit.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o - | FileCheck %s
-
-target triple = "aarch64-unknown-unknown-eabi"
-
-define void @test_tcommit() #0 {
-  tail call void @llvm.aarch64.tcommit()
-  ret void
-}
-
-declare void @llvm.aarch64.tcommit() #1
-
-attributes #0 = { "target-features"="+tme" }
-attributes #1 = { nounwind }
-
-; CHECK-LABEL: test_tcommit
-; CHECK: tcommit
diff --git a/llvm/test/CodeGen/AArch64/tme-tstart.ll b/llvm/test/CodeGen/AArch64/tme-tstart.ll
deleted file mode 100644
index c761842e7980d..0000000000000
--- a/llvm/test/CodeGen/AArch64/tme-tstart.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o - | FileCheck %s
-
-target triple = "aarch64-unknown-unknown-eabi"
-
-define i64 @test_tstart() #0 {
-  %r = tail call i64 @llvm.aarch64.tstart()
-  ret i64 %r
-}
-
-declare i64 @llvm.aarch64.tstart() #1
-
-attributes #0 = { "target-features"="+tme" }
-attributes #1 = { nounwind }
-
-; CHECK-LABEL: test_tstart
-; CHECK: tstart x
diff --git a/llvm/test/CodeGen/AArch64/tme-ttest.ll b/llvm/test/CodeGen/AArch64/tme-ttest.ll
deleted file mode 100644
index 597821081354f..0000000000000
--- a/llvm/test/CodeGen/AArch64/tme-ttest.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o - | FileCheck %s
-
-target triple = "aarch64-unknown-unknown-eabi"
-
-define i64 @test_ttest() #0 {
-  %r = tail call i64 @llvm.aarch64.ttest()
-  ret i64 %r
-}
-
-declare i64 @llvm.aarch64.ttest() #1
-
-attributes #0 = { "target-features"="+tme" }
-attributes #1 = { nounwind }
-
-; CHECK-LABEL: test_ttest
-; CHECK: ttest x
diff --git a/llvm/test/MC/AArch64/tme-error.s b/llvm/test/MC/AArch64/tme-error.s
deleted file mode 100644
index f91f58fa3ef25..0000000000000
--- a/llvm/test/MC/AArch64/tme-error.s
+++ /dev/null
@@ -1,47 +0,0 @@
-// Tests for transactional memory extension instructions
-// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+tme < %s 2>&1   | FileCheck %s
-
-tstart
-// CHECK: error: too few operands for instruction
-// CHECK-NEXT: tstart
-tstart  x4, x5
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: tstart x4, x5
-tstart  x4, #1
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: tstart x4, #1
-tstart  sp
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: tstart sp
-
-ttest
-// CHECK: error: too few operands for instruction
-// CHECK-NEXT: ttest
-ttest  x4, x5
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: ttest x4, x5
-ttest  x4, #1
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: ttest x4, #1
-ttest  sp
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: ttest sp
-
-tcommit  x4
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: tcommit x4
-tcommit  sp
-// CHECK: error: invalid operand for instruction
-// CHECK-NEXT: tcommit sp
-
-
-tcancel
-// CHECK: error: too few operands for instruction
-// CHECK-NEXT tcancel
-tcancel x0
-// CHECK: error: immediate must be an integer in range [0, 65535]
-// CHECK-NEXT tcancel
-tcancel #65536
-// CHECK: error: immediate must be an integer in range [0, 65535]
-// CHECK-NEXT: tcancel #65536
-
diff --git a/llvm/test/MC/AArch64/tme.s b/llvm/test/MC/AArch64/tme.s
deleted file mode 100644
index cd47274127649..0000000000000
--- a/llvm/test/MC/AArch64/tme.s
+++ /dev/null
@@ -1,24 +0,0 @@
-// Tests for transaction memory extension instructions
-//
-// RUN:     llvm-mc -triple aarch64 -show-encoding -mattr=+tme   < %s      | FileCheck %s
-// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-tme   < %s 2>&1 | FileCheck %s --check-prefix=NOTME
-
-tstart x3
-ttest  x4
-tcommit
-tcancel #0x1234
-
-// CHECK: tstart x3         // encoding: [0x63,0x30,0x23,0xd5]
-// CHECK: ttest x4          // encoding: [0x64,0x31,0x23,0xd5]
-// CHECK: tcommit           // encoding: [0x7f,0x30,0x03,0xd5]
-// CHECK: tcancel #0x1234   // encoding: [0x80,0x46,0x62,0xd4]
-
-
-// NOTME: instruction requires: tme
-// NOTME-NEXT: tstart x3
-// NOTME: instruction requires: tme
-// NOTME-NEXT: ttest  x4
-// NOTME: instruction requires: tme
-// NOTME-NEXT: tcommit
-// NOTME: instruction requires: tme
-// NOTME-NEXT: tcancel #0x1234
diff --git a/llvm/test/MC/Disassembler/AArch64/tme.txt b/llvm/test/MC/Disassembler/AArch64/tme.txt
deleted file mode 100644
index f250b33e0e1df..0000000000000
--- a/llvm/test/MC/Disassembler/AArch64/tme.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Tests for transaction memory extension instructions
-# RUN:     llvm-mc -triple=aarch64 -mattr=+tme   -disassemble < %s      | FileCheck %s
-# RUN: not llvm-mc -triple=aarch64 -mattr=-tme   -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOTME
-
-[0x63,0x30,0x23,0xd5]
-[0x64,0x31,0x23,0xd5]
-[0x7f,0x30,0x03,0xd5]
-[0x80,0x46,0x62,0xd4]
-
-# CHECK: tstart x3
-# CHECK: ttest  x4
-# CHECK: tcommit
-# CHECK: tcancel #0x1234
-
-# NOTEME: mrs
-# NOTEME-NEXT: mrs
-# NOTEME-NEXT: msr
-# NOTME:      warning: invalid instruction encoding
-# NOTME-NEXT: [0x80,0x46,0x62,0xd4]
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 5ef8f2e4b5500..34c7a8a4fd1c8 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -1119,7 +1119,6 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
                               {"rcpc", "norcpc", "+rcpc", "-rcpc" },
                               {"rng", "norng", "+rand", "-rand"},
                               {"memtag", "nomemtag", "+mte", "-mte"},
-                              {"tme", "notme", "+tme", "-tme"},
                               {"ssbs", "nossbs", "+ssbs", "-ssbs"},
                               {"sb", "nosb", "+sb", "-sb"},
                               {"predres", "nopredres", "+predres", "-predres"}

From f90d3dff6edc906766cfe3196d6ac1b6d76e9c5a Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 17 Jul 2019 17:56:57 +0000
Subject: [PATCH 374/451] [dotest] Disable color while testing.

Disable colors so we don't risk having unexpected ANSI codes in the test
output. Currently, the behavior of a test can change depending on
whether it's run under a color-supporting terminal, or under a dummy
terminal, for example when using lit or multiprocessing.

llvm-svn: 366356
---
 lldb/packages/Python/lldbsuite/test/lldbtest.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index 0fbdfd82f53e2..35f0c76a97ae1 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -1869,6 +1869,9 @@ def setUp(self):
         # differ in the debug info, which is not being hashed.
         self.runCmd('settings set symbols.enable-external-lookup false')
 
+        # Disable color.
+        self.runCmd("settings set use-color false")
+
         # Make sure that a sanitizer LLDB's environment doesn't get passed on.
         if 'DYLD_LIBRARY_PATH' in os.environ:
             self.runCmd('settings set target.env-vars DYLD_LIBRARY_PATH=')

From 48f5a43bcc3b128aca54b99b565595a5fa6ad166 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Wed, 17 Jul 2019 18:03:39 +0000
Subject: [PATCH 375/451] [OPENMP]Fix PR42632: crash on the analysis of the
 OpenMP constructs.

Fixed processing of the CapturedStmt children to fix the crash of the
OpenMP constructs during analysis.

llvm-svn: 366357
---
 clang/lib/AST/ParentMap.cpp              | 12 ++++++++++++
 clang/test/Analysis/openmp-unsupported.c |  4 ++++
 2 files changed, 16 insertions(+)

diff --git a/clang/lib/AST/ParentMap.cpp b/clang/lib/AST/ParentMap.cpp
index e09b5bbe75f32..2ff5c9d8aeb57 100644
--- a/clang/lib/AST/ParentMap.cpp
+++ b/clang/lib/AST/ParentMap.cpp
@@ -83,6 +83,18 @@ static void BuildParentMap(MapTy& M, Stmt* S,
     }
     break;
   }
+  case Stmt::CapturedStmtClass:
+    for (Stmt *SubStmt : S->children()) {
+      if (SubStmt) {
+        M[SubStmt] = S;
+        BuildParentMap(M, SubStmt, OVMode);
+      }
+    }
+    if (Stmt *SubStmt = cast<CapturedStmt>(S)->getCapturedStmt()) {
+      M[SubStmt] = S;
+      BuildParentMap(M, SubStmt, OVMode);
+    }
+    break;
   default:
     for (Stmt *SubStmt : S->children()) {
       if (SubStmt) {
diff --git a/clang/test/Analysis/openmp-unsupported.c b/clang/test/Analysis/openmp-unsupported.c
index 7e363eecbaa08..b2e1a1b021797 100644
--- a/clang/test/Analysis/openmp-unsupported.c
+++ b/clang/test/Analysis/openmp-unsupported.c
@@ -4,4 +4,8 @@
 void openmp_parallel_crash_test() {
 #pragma omp parallel
   ;
+#pragma omp parallel for
+  for (int i = 0; i < 8; ++i)
+    for (int j = 0, k = 0; j < 8; ++j)
+      ;
 }

From accad76c1425492e700178f557573e661d0c0afa Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 17 Jul 2019 18:43:36 +0000
Subject: [PATCH 376/451] [lld][WebAssembly] Fix handling of comdat functions
 in init array.

When hidden symbols are discarded by comdat rules we still want to
create a local defined symbol, otherwise `Symbol::isDiscarded()` relies
on begin able to check `getChunk->discarded`.

This is a followup on rL362769. The comdat.ll test was previously GC'ing
the `__wasm_call_ctors` functions so `do_init` was not actually being
included in the link.  Once that function was included in triggered the
crash bug that this change addresses.

Fixes: https://github.com/emscripten-core/emscripten/issues/8981

Differential Revision: https://reviews.llvm.org/D64872

llvm-svn: 366358
---
 lld/test/wasm/Inputs/comdat1.ll |  4 ++--
 lld/test/wasm/Inputs/comdat2.ll |  4 ++--
 lld/test/wasm/comdats.ll        | 24 ++++++++++++++++--------
 lld/wasm/InputFiles.cpp         | 10 ++++------
 lld/wasm/Writer.cpp             |  1 +
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/lld/test/wasm/Inputs/comdat1.ll b/lld/test/wasm/Inputs/comdat1.ll
index 9e5bd1a6de9c0..34f7f161d10cd 100644
--- a/lld/test/wasm/Inputs/comdat1.ll
+++ b/lld/test/wasm/Inputs/comdat1.ll
@@ -12,8 +12,8 @@ define internal void @do_init() comdat($foo) {
   ret void
 }
 
-@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void
-()*, i8* } { i32 65535, void ()* @do_init, i8* null }]
+%0 = type { i32, void ()*, i8* }
+@llvm.global_ctors = appending global [1 x %0 ] [%0 { i32 65535, void ()* @do_init, i8* null }]
 
 ; Everything above this is part of the `foo` comdat group
 
diff --git a/lld/test/wasm/Inputs/comdat2.ll b/lld/test/wasm/Inputs/comdat2.ll
index 0618d55aa2f5d..b1478ad7c3210 100644
--- a/lld/test/wasm/Inputs/comdat2.ll
+++ b/lld/test/wasm/Inputs/comdat2.ll
@@ -12,8 +12,8 @@ define internal void @do_init() comdat($foo) {
   ret void
 }
 
-@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void
-()*, i8* } { i32 65535, void ()* @do_init, i8* null }]
+%0 = type { i32, void ()*, i8* }
+@llvm.global_ctors = appending global [1 x %0] [ %0 { i32 65535, void ()* @do_init, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @constantData, i32 0, i32 0) }]
 
 ; Everything above this is part of the `foo` comdat group
 
diff --git a/lld/test/wasm/comdats.ll b/lld/test/wasm/comdats.ll
index 602cd313afe87..8fc301e9a10e0 100644
--- a/lld/test/wasm/comdats.ll
+++ b/lld/test/wasm/comdats.ll
@@ -6,10 +6,12 @@
 
 target triple = "wasm32-unknown-unknown"
 
+declare void @__wasm_call_ctors()
 declare i32 @comdatFn()
 
 define void @_start() {
 entry:
+  call void @__wasm_call_ctors()
   %call = call i32 @comdatFn()
   ret void
 }
@@ -35,38 +37,44 @@ entry:
 ; CHECK-NEXT:        Index:           0
 ; CHECK-NEXT:      - Name:            _start
 ; CHECK-NEXT:        Kind:            FUNCTION
-; CHECK-NEXT:        Index:           0
+; CHECK-NEXT:        Index:           1
 ; CHECK-NEXT:      - Name:            comdatFn
 ; CHECK-NEXT:        Kind:            FUNCTION
-; CHECK-NEXT:        Index:           1
+; CHECK-NEXT:        Index:           2
 ; CHECK-NEXT:      - Name:            constantData
 ; CHECK-NEXT:        Kind:            GLOBAL
 ; CHECK-NEXT:        Index:           1
 ; CHECK-NEXT:      - Name:            callComdatFn1
 ; CHECK-NEXT:        Kind:            FUNCTION
-; CHECK-NEXT:        Index:           2
+; CHECK-NEXT:        Index:           4
 ; CHECK-NEXT:      - Name:            callComdatFn2
 ; CHECK-NEXT:        Kind:            FUNCTION
-; CHECK-NEXT:        Index:           3
+; CHECK-NEXT:        Index:           5
 ; CHECK-NEXT:  - Type:            ELEM
 ; CHECK-NEXT:    Segments:
 ; CHECK-NEXT:      - Offset:
 ; CHECK-NEXT:          Opcode:          I32_CONST
 ; CHECK-NEXT:          Value:           1
-; CHECK-NEXT:        Functions:       [ 1 ]
+; CHECK-NEXT:        Functions:       [ 2 ]
 ; CHECK-NEXT:  - Type:            CODE
 ; CHECK-NEXT:    Functions:
 ; CHECK-NEXT:      - Index:           0
 ; CHECK-NEXT:        Locals:
-; CHECK-NEXT:        Body:            1081808080001A0B
+; CHECK-NEXT:        Body:            10030B
 ; CHECK-NEXT:      - Index:           1
 ; CHECK-NEXT:        Locals:
-; CHECK-NEXT:        Body:            4180888080000B
+; CHECK-NEXT:        Body:            1080808080001082808080001A0B
 ; CHECK-NEXT:      - Index:           2
 ; CHECK-NEXT:        Locals:
-; CHECK-NEXT:        Body:            4181808080000B
+; CHECK-NEXT:        Body:            4180888080000B
 ; CHECK-NEXT:      - Index:           3
 ; CHECK-NEXT:        Locals:
+; CHECK-NEXT:        Body:            0B
+; CHECK-NEXT:      - Index:           4
+; CHECK-NEXT:        Locals:
+; CHECK-NEXT:        Body:            4181808080000B
+; CHECK-NEXT:      - Index:           5
+; CHECK-NEXT:        Locals:
 ; CHECK-NEXT:        Body:            4181808080000B
 ; CHECK-NEXT:  - Type:            DATA
 ; CHECK-NEXT:    Segments:
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index b4945469f931c..33ae3325c7472 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -380,22 +380,20 @@ Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
   case WASM_SYMBOL_TYPE_FUNCTION: {
     InputFunction *func =
         functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
-    if (func->discarded)
-      return nullptr;
     if (sym.isBindingLocal())
       return make<DefinedFunction>(name, flags, this, func);
+    if (func->discarded)
+      return nullptr;
     return symtab->addDefinedFunction(name, flags, this, func);
   }
   case WASM_SYMBOL_TYPE_DATA: {
     InputSegment *seg = segments[sym.Info.DataRef.Segment];
-    if (seg->discarded)
-      return nullptr;
-
     uint32_t offset = sym.Info.DataRef.Offset;
     uint32_t size = sym.Info.DataRef.Size;
-
     if (sym.isBindingLocal())
       return make<DefinedData>(name, flags, this, seg, offset, size);
+    if (seg->discarded)
+      return nullptr;
     return symtab->addDefinedData(name, flags, this, seg, offset, size);
   }
   case WASM_SYMBOL_TYPE_GLOBAL: {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 23a63edee7cca..4ad91ab111713 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -821,6 +821,7 @@ void Writer::calculateInitFunctions() {
       assert(sym->isLive());
       if (*sym->signature != WasmSignature{{}, {}})
         error("invalid signature for init func: " + toString(*sym));
+      LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");
       initFunctions.emplace_back(WasmInitEntry{sym, f.Priority});
     }
   }

From 66412df94f8e6596664471ef9672655fc62754ce Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Wed, 17 Jul 2019 18:54:29 +0000
Subject: [PATCH 377/451] [libc++] XFAIL a test that does not behave properly
 on older Clang

rdar://53015486

llvm-svn: 366359
---
 .../language.support/support.dynamic/libcpp_deallocate.sh.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
index 00e70d6025e51..af214eaab1bf7 100644
--- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
+++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
@@ -30,6 +30,10 @@
 // XFAIL: availability=macosx10.8
 // XFAIL: availability=macosx10.7
 
+// AppleClang < 10 incorrectly warns that aligned allocation is not supported
+// even when it is supported.
+// XFAIL: apple-clang-9
+
 // XFAIL: sanitizer-new-delete, ubsan
 
 // GCC doesn't support the aligned-allocation flags.

From d752f5e95309c28e1b0c174a5a770dc5d0244540 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Wed, 17 Jul 2019 19:24:02 +0000
Subject: [PATCH 378/451] Basic codegen for MTE stack tagging.

Implement IR intrinsics for stack tagging. Generated code is very
unoptimized for now.

Two special intrinsics, llvm.aarch64.irg.sp and llvm.aarch64.tagp are
used to implement a tagged stack frame pointer in a virtual register.

Differential Revision: https://reviews.llvm.org/D64172

llvm-svn: 366360
---
 .../llvm/CodeGen/SelectionDAGTargetInfo.h     |   8 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  30 ++++
 llvm/lib/Analysis/ValueTracking.cpp           |   3 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  13 ++
 .../AArch64/AArch64ExpandPseudoInsts.cpp      | 102 +++++++++++++
 .../Target/AArch64/AArch64FrameLowering.cpp   |   4 +
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  60 +++++++-
 .../Target/AArch64/AArch64ISelLowering.cpp    |   4 +
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   8 +-
 .../lib/Target/AArch64/AArch64InstrFormats.td |   4 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  17 +++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  45 ++++++
 .../AArch64/AArch64MachineFunctionInfo.h      |  13 ++
 .../Target/AArch64/AArch64RegisterInfo.cpp    |  15 +-
 .../AArch64/AArch64SelectionDAGInfo.cpp       |  88 +++++++++++
 .../Target/AArch64/AArch64SelectionDAGInfo.h  |   4 +
 .../Analysis/ValueTracking/aarch64.irg.ll     |  16 ++
 llvm/test/CodeGen/AArch64/irg.ll              |  42 ++++++
 llvm/test/CodeGen/AArch64/irg_sp_tagp.ll      |  93 ++++++++++++
 llvm/test/CodeGen/AArch64/settag.ll           | 138 ++++++++++++++++++
 llvm/test/CodeGen/AArch64/stgp.ll             |  78 ++++++++++
 llvm/test/CodeGen/AArch64/tagp.ll             |  41 ++++++
 22 files changed, 818 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/irg.ll
 create mode 100644 llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
 create mode 100644 llvm/test/CodeGen/AArch64/settag.ll
 create mode 100644 llvm/test/CodeGen/AArch64/stgp.ll
 create mode 100644 llvm/test/CodeGen/AArch64/tagp.ll

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
index 7c9f57beb660b..6f6a9a5ae2695 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h
@@ -147,6 +147,14 @@ class SelectionDAGTargetInfo {
     return std::make_pair(SDValue(), SDValue());
   }
 
+  virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+                                          SDValue Chain, SDValue Addr,
+                                          SDValue Size,
+                                          MachinePointerInfo DstPtrInfo,
+                                          bool ZeroData) const {
+    return SDValue();
+  }
+
   // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather
   // than FMUL and ADD is delegated to the machine combiner.
   virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const {
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 7616d6a90c1bc..832aca4fd30fd 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -702,4 +702,34 @@ def int_aarch64_stg   : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
     [IntrWriteMem]>;
 def int_aarch64_subp :  Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty],
     [IntrNoMem]>;
+
+// The following are codegen-only intrinsics for stack instrumentation.
+
+// Generate a randomly tagged stack base pointer.
+def int_aarch64_irg_sp   : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty],
+    [IntrInaccessibleMemOnly]>;
+
+// Transfer pointer tag with offset.
+// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where
+// * address is the address in ptr0
+// * tag is a function of (tag in baseptr, tag_offset).
+// Address bits in baseptr and tag bits in ptr0 are ignored.
+// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as
+//   ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset
+// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp.
+def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty],
+    [IntrNoMem, ImmArg<2>]>;
+
+// Update allocation tags for the memory range to match the tag in the pointer argument.
+def int_aarch64_settag  : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+
+// Update allocation tags for the memory range to match the tag in the pointer argument,
+// and set memory contents to zero.
+def int_aarch64_settag_zero  : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
+    [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
+
+// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values.
+def int_aarch64_stgp  : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
+    [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>;
 }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 49a328bbc9ba3..c70906dcc6295 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3666,7 +3666,8 @@ bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
     const CallBase *Call) {
   return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
          Call->getIntrinsicID() == Intrinsic::strip_invariant_group ||
-         Call->getIntrinsicID() == Intrinsic::aarch64_irg;
+         Call->getIntrinsicID() == Intrinsic::aarch64_irg ||
+         Call->getIntrinsicID() == Intrinsic::aarch64_tagp;
 }
 
 /// \p PN defines a loop-variant pointer to an object.  Check if the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 61ec29261c396..e818dd27c05e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6805,6 +6805,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
     // delete it now.
     return;
+
+  case Intrinsic::aarch64_settag:
+  case Intrinsic::aarch64_settag_zero: {
+    const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+    bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
+    SDValue Val = TSI.EmitTargetCodeForSetTag(
+        DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+        getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
+        ZeroMemory);
+    DAG.setRoot(Val);
+    setValue(&I, Val);
+    return;
+  }
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 68076d2ca925f..210c10eb18421 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -15,6 +15,7 @@
 
 #include "AArch64ExpandImm.h"
 #include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
@@ -74,6 +75,9 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
                           MachineBasicBlock::iterator &NextMBBI);
+  bool expandSetTagLoop(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI,
+                        MachineBasicBlock::iterator &NextMBBI);
 };
 
 } // end anonymous namespace
@@ -336,6 +340,64 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
   return true;
 }
 
+bool AArch64ExpandPseudo::expandSetTagLoop(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  Register SizeReg = MI.getOperand(2).getReg();
+  Register AddressReg = MI.getOperand(3).getReg();
+
+  MachineFunction *MF = MBB.getParent();
+
+  bool ZeroData = MI.getOpcode() == AArch64::STZGloop;
+  const unsigned OpCode =
+      ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
+
+  auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  MF->insert(++MBB.getIterator(), LoopBB);
+  MF->insert(++LoopBB->getIterator(), DoneBB);
+
+  BuildMI(LoopBB, DL, TII->get(OpCode))
+      .addDef(AddressReg)
+      .addReg(AddressReg)
+      .addReg(AddressReg)
+      .addImm(2)
+      .cloneMemRefs(MI)
+      .setMIFlags(MI.getFlags());
+  BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
+      .addDef(SizeReg)
+      .addReg(SizeReg)
+      .addImm(16 * 2)
+      .addImm(0);
+  BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
+
+  LoopBB->addSuccessor(LoopBB);
+  LoopBB->addSuccessor(DoneBB);
+
+  DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
+  DoneBB->transferSuccessors(&MBB);
+
+  MBB.addSuccessor(LoopBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+  // Recompute liveness bottom up.
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *DoneBB);
+  computeAndAddLiveIns(LiveRegs, *LoopBB);
+  // Do an extra pass in the loop to get the loop carried dependencies right.
+  // FIXME: is this necessary?
+  LoopBB->clearLiveIns();
+  computeAndAddLiveIns(LiveRegs, *LoopBB);
+  DoneBB->clearLiveIns();
+  computeAndAddLiveIns(LiveRegs, *DoneBB);
+
+  return true;
+}
+
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -569,6 +631,46 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     MI.eraseFromParent();
     return true;
    }
+   case AArch64::IRGstack: {
+     MachineFunction &MF = *MBB.getParent();
+     const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+     const AArch64FrameLowering *TFI =
+         MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
+
+     // IRG does not allow immediate offset. getTaggedBasePointerOffset should
+     // almost always point to SP-after-prologue; if not, emit a longer
+     // instruction sequence.
+     int BaseOffset = -AFI->getTaggedBasePointerOffset();
+     unsigned FrameReg;
+     int FrameRegOffset = TFI->resolveFrameOffsetReference(
+         MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false,
+         /*ForSimm=*/true);
+     Register SrcReg = FrameReg;
+     if (FrameRegOffset != 0) {
+       // Use output register as temporary.
+       SrcReg = MI.getOperand(0).getReg();
+       emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
+                       FrameRegOffset, TII);
+     }
+     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
+         .add(MI.getOperand(0))
+         .addUse(SrcReg)
+         .add(MI.getOperand(2));
+     MI.eraseFromParent();
+     return true;
+   }
+   case AArch64::TAGPstack: {
+     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG))
+         .add(MI.getOperand(0))
+         .add(MI.getOperand(1))
+         .add(MI.getOperand(2))
+         .add(MI.getOperand(4));
+     MI.eraseFromParent();
+     return true;
+   }
+   case AArch64::STGloop:
+   case AArch64::STZGloop:
+     return expandSetTagLoop(MBB, MBBI, NextMBBI);
   }
   return false;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fed0fc7f62477..8c6e5cbd5c13b 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -842,6 +842,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     return;
 
+  // Set tagged base pointer to the bottom of the stack frame.
+  // Ideally it should match SP value after prologue.
+  AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+
   // getStackSize() includes all the locals in its size calculation. We don't
   // include these locals when computing the stack size of a funclet, as they
   // are allocated in the parent's stack frame and accessed via the frame
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 2a911c439083e..cd7e927ac80ca 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -157,6 +157,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   bool tryIndexedLoad(SDNode *N);
 
+  bool trySelectStackSlotTagP(SDNode *N);
+  void SelectTagP(SDNode *N);
+
   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
                      unsigned SubRegIdx);
   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
@@ -703,7 +706,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSigned
     return true;
   }
 
-  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
   // selected here doesn't support labels/immediates, only base+offset.
   if (CurDAG->isBaseWithConstantOffset(N)) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -2790,6 +2793,58 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   return true;
 }
 
+bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
+  // tagp(FrameIndex, IRGstack, tag_offset):
+  // since the offset between FrameIndex and IRGstack is a compile-time
+  // constant, this can be lowered to a single ADDG instruction.
+  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
+    return false;
+  }
+
+  SDValue IRG_SP = N->getOperand(2);
+  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
+      cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
+          Intrinsic::aarch64_irg_sp) {
+    return false;
+  }
+
+  const TargetLowering *TLI = getTargetLowering();
+  SDLoc DL(N);
+  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
+  SDValue FiOp = CurDAG->getTargetFrameIndex(
+      FI, TLI->getPointerTy(CurDAG->getDataLayout()));
+  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+
+  SDNode *Out = CurDAG->getMachineNode(
+      AArch64::TAGPstack, DL, MVT::i64,
+      {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
+       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
+  ReplaceNode(N, Out);
+  return true;
+}
+
+void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
+  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
+         "llvm.aarch64.tagp third argument must be an immediate");
+  if (trySelectStackSlotTagP(N))
+    return;
+  // FIXME: above applies in any case when offset between Op1 and Op2 is a
+  // compile-time constant, not just for stack allocations.
+
+  // General case for unrelated pointers in Op1 and Op2.
+  SDLoc DL(N);
+  int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
+                                      {N->getOperand(1), N->getOperand(2)});
+  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
+                                      {SDValue(N1, 0), N->getOperand(2)});
+  SDNode *N3 = CurDAG->getMachineNode(
+      AArch64::ADDG, DL, MVT::i64,
+      {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
+       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
+  ReplaceNode(N, N3);
+}
+
 void AArch64DAGToDAGISel::Select(SDNode *Node) {
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
@@ -3283,6 +3338,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     switch (IntNo) {
     default:
       break;
+    case Intrinsic::aarch64_tagp:
+      SelectTagP(Node);
+      return;
     case Intrinsic::aarch64_neon_tbl2:
       SelectTable(Node, 2,
                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 11ee1a5f9e681..7becc99fb5c79 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1234,6 +1234,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::FRECPS:            return "AArch64ISD::FRECPS";
   case AArch64ISD::FRSQRTE:           return "AArch64ISD::FRSQRTE";
   case AArch64ISD::FRSQRTS:           return "AArch64ISD::FRSQRTS";
+  case AArch64ISD::STG:               return "AArch64ISD::STG";
+  case AArch64ISD::STZG:              return "AArch64ISD::STZG";
+  case AArch64ISD::ST2G:              return "AArch64ISD::ST2G";
+  case AArch64ISD::STZ2G:             return "AArch64ISD::STZ2G";
   }
   return nullptr;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 754caaf519217..4421c31f65c91 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -214,7 +214,13 @@ enum NodeType : unsigned {
   LD4LANEpost,
   ST2LANEpost,
   ST3LANEpost,
-  ST4LANEpost
+  ST4LANEpost,
+
+  STG,
+  STZG,
+  ST2G,
+  STZ2G
+
 };
 
 } // end namespace AArch64ISD
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 74fa5ef713d9e..d619137b55c58 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -4067,12 +4067,12 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
                     (outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
   def PreIndex :
     BaseMemTagStore<opc1, 0b11, insn, "\t$Rt, [$Rn, $offset]!",
-                    "$Rn = $wback,@earlyclobber $wback",
+                    "$Rn = $wback",
                     (outs GPR64sp:$wback),
                     (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
   def PostIndex :
     BaseMemTagStore<opc1, 0b01, insn, "\t$Rt, [$Rn], $offset",
-                    "$Rn = $wback,@earlyclobber $wback",
+                    "$Rn = $wback",
                     (outs GPR64sp:$wback),
                     (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 599a5abd611be..215e96a82d0e8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1772,6 +1772,7 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
   case AArch64::STNPWi:
   case AArch64::STNPSi:
   case AArch64::LDG:
+  case AArch64::STGPi:
     return 3;
   case AArch64::ADDG:
   case AArch64::STGOffset:
@@ -2151,6 +2152,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
     MaxOffset = 4095;
     break;
   case AArch64::ADDG:
+  case AArch64::TAGPstack:
     Scale = 16;
     Width = 0;
     MinOffset = 0;
@@ -2158,10 +2160,23 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
     break;
   case AArch64::LDG:
   case AArch64::STGOffset:
+  case AArch64::STZGOffset:
     Scale = Width = 16;
     MinOffset = -256;
     MaxOffset = 255;
     break;
+  case AArch64::ST2GOffset:
+  case AArch64::STZ2GOffset:
+    Scale = 16;
+    Width = 32;
+    MinOffset = -256;
+    MaxOffset = 255;
+    break;
+  case AArch64::STGPi:
+    Scale = Width = 16;
+    MinOffset = -64;
+    MaxOffset = 63;
+    break;
   }
 
   return true;
@@ -3257,6 +3272,8 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   case AArch64::ST1Twov1d:
   case AArch64::ST1Threev1d:
   case AArch64::ST1Fourv1d:
+  case AArch64::IRG:
+  case AArch64::IRGstack:
     return AArch64FrameOffsetCannotUpdate;
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 897b3ebb3847f..eed53f36d5741 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -409,6 +409,12 @@ def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
 def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
 def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
 
+def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
+def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -1289,6 +1295,15 @@ defm STZG  : MemTagStore<0b01, "stzg">;
 defm ST2G  : MemTagStore<0b10, "st2g">;
 defm STZ2G : MemTagStore<0b11, "stz2g">;
 
+def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (STGOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (STZGOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (ST2GOffset $Rn, $Rm, $imm)>;
+def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
+          (STZ2GOffset $Rn, $Rm, $imm)>;
+
 defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
 def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
 def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
@@ -1296,6 +1311,36 @@ def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
 def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
           (STGOffset GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
 
+def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
+          (STGPi $Rt, $Rt2, $Rn, $imm)>;
+
+def IRGstack
+    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
+      Sched<[]>;
+def TAGPstack
+    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
+      Sched<[]>;
+
+// Explicit SP in the first operand prevents ShrinkWrap optimization
+// from leaving this instruction out of the stack frame. When IRGstack
+// is transformed into IRG, this operand is replaced with the actual
+// register / expression for the tagged base pointer of the current function.
+def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
+
+// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address.
+// $Rn_wback is one past the end of the range.
+let isCodeGenOnly=1, mayStore=1 in {
+def STGloop
+    : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+      Sched<[WriteAdr, WriteST]>;
+
+def STZGloop
+    : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn),
+             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >,
+      Sched<[WriteAdr, WriteST]>;
+}
+
 } // Predicates = [HasMTE]
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index f4e810fa454ca..0efeeb272ec1f 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -105,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
   /// ForwardedMustTailRegParms - A list of virtual and physical registers
   /// that must be forwarded to every musttail call.
   SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
+
+  // Offset from SP-at-entry to the tagged base pointer.
+  // Tagged base pointer is set up to point to the first (lowest address) tagged
+  // stack slot.
+  unsigned TaggedBasePointerOffset;
+
 public:
   AArch64FunctionInfo() = default;
 
@@ -224,6 +230,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
     return ForwardedMustTailRegParms;
   }
 
+  unsigned getTaggedBasePointerOffset() const {
+    return TaggedBasePointerOffset;
+  }
+  void setTaggedBasePointerOffset(unsigned Offset) {
+    TaggedBasePointerOffset = Offset;
+  }
+
 private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index c44d77c7263b1..6d5a4e3d2f767 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -468,10 +468,19 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     return;
   }
 
-  // Modify MI as necessary to handle as much of 'Offset' as possible
-  Offset = TFI->resolveFrameIndexReference(
-      MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
+  if (MI.getOpcode() == AArch64::TAGPstack) {
+    // TAGPstack must use the virtual frame register in its 3rd operand.
+    const MachineFrameInfo &MFI = MF.getFrameInfo();
+    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+    FrameReg = MI.getOperand(3).getReg();
+    Offset =
+        MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset();
+  } else {
+    Offset = TFI->resolveFrameIndexReference(
+        MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
+  }
 
+  // Modify MI as necessary to handle as much of 'Offset' as possible
   if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
     return;
 
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 953d7387f2e05..60dbace03ca6e 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -56,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
     CodeGenOpt::Level OptLevel) const {
   return OptLevel >= CodeGenOpt::Aggressive;
 }
+
+static const int kSetTagLoopThreshold = 176;
+
+static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Ptr, uint64_t ObjSize,
+                                  const MachineMemOperand *BaseMemOperand,
+                                  bool ZeroData) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned ObjSizeScaled = ObjSize / 16;
+
+  SDValue TagSrc = Ptr;
+  if (Ptr.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(Ptr)->getIndex();
+    Ptr = DAG.getTargetFrameIndex(FI, MVT::i64);
+    // A frame index operand may end up as [SP + offset] => it is fine to use SP
+    // register as the tag source.
+    TagSrc = DAG.getRegister(AArch64::SP, MVT::i64);
+  }
+
+  const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG;
+  const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G;
+
+  SmallVector<SDValue, 8> OutChains;
+  unsigned OffsetScaled = 0;
+  while (OffsetScaled < ObjSizeScaled) {
+    if (ObjSizeScaled - OffsetScaled >= 2) {
+      SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+      SDValue St = DAG.getMemIntrinsicNode(
+          OpCode2, dl, DAG.getVTList(MVT::Other),
+          {Chain, TagSrc, AddrNode},
+          MVT::v4i64,
+          MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2));
+      OffsetScaled += 2;
+      OutChains.push_back(St);
+      continue;
+    }
+
+    if (ObjSizeScaled - OffsetScaled > 0) {
+      SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl);
+      SDValue St = DAG.getMemIntrinsicNode(
+          OpCode1, dl, DAG.getVTList(MVT::Other),
+          {Chain, TagSrc, AddrNode},
+          MVT::v2i64,
+          MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16));
+      OffsetScaled += 1;
+      OutChains.push_back(St);
+    }
+  }
+
+  SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+  return Res;
+}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr,
+    SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const {
+  uint64_t ObjSize = cast<ConstantSDNode>(Size)->getZExtValue();
+  assert(ObjSize % 16 == 0);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand(
+      DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16);
+
+  bool UseSetTagRangeLoop =
+      kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold;
+  if (!UseSetTagRangeLoop)
+    return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand,
+                              ZeroData);
+
+  if (ObjSize % 32 != 0) {
+    SDNode *St1 = DAG.getMachineNode(
+        ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl,
+        {MVT::i64, MVT::Other},
+        {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain});
+    DAG.setNodeMemRefs(cast<MachineSDNode>(St1), {BaseMemOperand});
+    ObjSize -= 16;
+    Addr = SDValue(St1, 0);
+    Chain = SDValue(St1, 1);
+  }
+
+  const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other};
+  SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain};
+  SDNode *St = DAG.getMachineNode(
+      ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops);
+
+  DAG.setNodeMemRefs(cast<MachineSDNode>(St), {BaseMemOperand});
+  return SDValue(St, 2);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 9d386128fdccd..d0967fb973cc3 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -23,6 +23,10 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo {
                                   SDValue Chain, SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align, bool isVolatile,
                                   MachinePointerInfo DstPtrInfo) const override;
+  SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Op1, SDValue Op2,
+                                  MachinePointerInfo DstPtrInfo,
+                                  bool ZeroData) const override;
   bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override;
 };
 }
diff --git a/llvm/test/Analysis/ValueTracking/aarch64.irg.ll b/llvm/test/Analysis/ValueTracking/aarch64.irg.ll
index 75e198abe8516..3d8b0e22d7885 100644
--- a/llvm/test/Analysis/ValueTracking/aarch64.irg.ll
+++ b/llvm/test/Analysis/ValueTracking/aarch64.irg.ll
@@ -13,6 +13,22 @@ entry:
   ret void
 }
 
+; CHECK-LABEL: define void @checkNonnullTagp(
+define void @checkNonnullTagp(i8* %tag) {
+; CHECK:  %[[p:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %a, i8* %tag, i64 1)
+; CHECK:  %[[p2:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %[[p]], i8* %tag, i64 2)
+; CHECK:  call void @use(i8* nonnull %[[p2]])
+entry:
+  %a = alloca i8, align 8
+
+  %p = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 1)
+  %p2 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
+  call void @use(i8* %p2)
+
+  ret void
+}
+
 declare i8* @llvm.aarch64.irg(i8*, i64)
+declare i8* @llvm.aarch64.tagp.p0i8(i8*, i8*, i64)
 
 declare void @use(i8*)
diff --git a/llvm/test/CodeGen/AArch64/irg.ll b/llvm/test/CodeGen/AArch64/irg.ll
new file mode 100644
index 0000000000000..31a018e183623
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/irg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
+
+define i8* @irg_imm16(i8* %p) {
+entry:
+; CHECK-LABEL: irg_imm16:
+; CHECK: mov w[[R:[0-9]+]], #16
+; CHECK: irg x0, x0, x[[R]]
+; CHECK: ret
+  %q = call i8* @llvm.aarch64.irg(i8* %p, i64 16)
+  ret i8* %q
+}
+
+define i8* @irg_imm0(i8* %p) {
+entry:
+; CHECK-LABEL: irg_imm0:
+; CHECK: irg x0, x0{{$}}
+; CHECK: ret
+  %q = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
+  ret i8* %q
+}
+
+define i8* @irg_reg(i8* %p, i64 %ex) {
+entry:
+; CHECK-LABEL: irg_reg:
+; CHECK: irg x0, x0, x1
+; CHECK: ret
+  %q = call i8* @llvm.aarch64.irg(i8* %p, i64 %ex)
+  ret i8* %q
+}
+
+; undef argument in irg is treated specially
+define i8* @irg_sp() {
+entry:
+; CHECK-LABEL: irg_sp:
+; CHECK: irg x0, sp{{$}}
+; CHECK: ret
+  %q = call i8* @llvm.aarch64.irg.sp(i64 0)
+  ret i8* %q
+}
+
+declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
+declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
diff --git a/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll b/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
new file mode 100644
index 0000000000000..1232090fc6b6c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
+
+define i8* @small_alloca() {
+entry:
+; CHECK-LABEL: small_alloca:
+; CHECK:      irg  [[R:x[0-9]+]], sp{{$}}
+; CHECK-NEXT: addg x0, [[R]], #0, #1
+; CHECK:      ret
+  %a = alloca i8, align 16
+  %q = call i8* @llvm.aarch64.irg.sp(i64 0)
+  %q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %q, i64 1)
+  ret i8* %q1
+}
+
+; Two large allocas. One's offset overflows addg immediate.
+define void @huge_allocas() {
+entry:
+; CHECK-LABEL: huge_allocas:
+; CHECK:      irg  [[R:x[0-9]+]], sp{{$}}
+; CHECK:      add  [[TMP:x[0-9]+]], [[R]], #3088
+; CHECK:      addg x0, [[TMP]], #1008, #1
+; CHECK:      addg x1, [[R]], #0, #2
+; CHECK:      bl use2
+  %a = alloca i8, i64 4096, align 16
+  %b = alloca i8, i64 4096, align 16
+  %base = call i8* @llvm.aarch64.irg.sp(i64 0)
+  %a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
+  %b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 2)
+  call void @use2(i8* %a_t, i8* %b_t)
+  ret void
+}
+
+; Realigned stack frame. IRG uses value of SP after realignment,
+; ADDG for the first stack allocation has offset 0.
+define void @realign() {
+entry:
+; CHECK-LABEL: realign:
+; CHECK:      add  x29, sp, #16
+; CHECK:      and  sp, x{{[0-9]*}}, #0xffffffffffffffc0
+; CHECK:      irg  [[R:x[0-9]+]], sp{{$}}
+; CHECK:      addg x0, [[R]], #0, #1
+; CHECK:      bl use
+  %a = alloca i8, i64 4096, align 64
+  %base = call i8* @llvm.aarch64.irg.sp(i64 0)
+  %a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1)
+  call void @use(i8* %a_t)
+  ret void
+}
+
+; With a dynamic alloca, IRG has to use FP with non-zero offset.
+; ADDG offset for the single static alloca is still zero.
+define void @dynamic_alloca(i64 %size) {
+entry:
+; CHECK-LABEL: dynamic_alloca:
+; CHECK:      sub  [[R:x[0-9]+]], x29, #[[OFS:[0-9]+]]
+; CHECK:      irg  [[R]], [[R]]
+; CHECK:      addg x1, [[R]], #0, #1
+; CHECK:      sub  x0, x29, #[[OFS]]
+; CHECK:      bl   use2
+  %base = call i8* @llvm.aarch64.irg.sp(i64 0)
+  %a = alloca i128, i64 %size, align 16
+  %b = alloca i8, i64 16, align 16
+  %b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
+  call void @use2(i8* %b, i8* %b_t)
+  ret void
+}
+
+; Both dynamic alloca and realigned frame.
+; After initial realignment, generate the base pointer.
+; IRG uses the base pointer w/o offset.
+; Offsets for tagged and untagged pointers to the same alloca match.
+define void @dynamic_alloca_and_realign(i64 %size) {
+entryz:
+; CHECK-LABEL: dynamic_alloca_and_realign:
+; CHECK:      and  sp, x{{.*}}, #0xffffffffffffffc0
+; CHECK:      mov  x19, sp
+; CHECK:      irg  [[R:x[0-9]+]], x19
+; CHECK:      addg x1, [[R]], #[[OFS:[0-9]+]], #1
+; CHECK:      add  x0, x19, #[[OFS]]
+; CHECK:      bl   use2
+  %base = call i8* @llvm.aarch64.irg.sp(i64 0)
+  %a = alloca i128, i64 %size, align 64
+  %b = alloca i8, i64 16, align 16
+  %b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1)
+  call void @use2(i8* %b, i8* %b_t)
+  ret void
+}
+
+declare void @use(i8*)
+declare void @use2(i8*, i8*)
+
+declare i8* @llvm.aarch64.irg.sp(i64 %exclude)
+declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)
diff --git a/llvm/test/CodeGen/AArch64/settag.ll b/llvm/test/CodeGen/AArch64/settag.ll
new file mode 100644
index 0000000000000..9ca188fbce325
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/settag.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
+
+define void @stg1(i8* %p) {
+entry:
+; CHECK-LABEL: stg1:
+; CHECK: stg x0, [x0]
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 16)
+  ret void
+}
+
+define void @stg2(i8* %p) {
+entry:
+; CHECK-LABEL: stg2:
+; CHECK: st2g x0, [x0]
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 32)
+  ret void
+}
+
+define void @stg3(i8* %p) {
+entry:
+; CHECK-LABEL: stg3:
+; CHECK: stg  x0, [x0, #32]
+; CHECK: st2g x0, [x0]
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 48)
+  ret void
+}
+
+define void @stg4(i8* %p) {
+entry:
+; CHECK-LABEL: stg4:
+; CHECK: st2g x0, [x0, #32]
+; CHECK: st2g x0, [x0]
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 64)
+  ret void
+}
+
+define void @stg5(i8* %p) {
+entry:
+; CHECK-LABEL: stg5:
+; CHECK: stg  x0, [x0, #64]
+; CHECK: st2g x0, [x0, #32]
+; CHECK: st2g x0, [x0]
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 80)
+  ret void
+}
+
+define void @stg16(i8* %p) {
+entry:
+; CHECK-LABEL: stg16:
+; CHECK: mov  {{(w|x)}}[[R:[0-9]+]], #256
+; CHECK: st2g x0, [x0], #32
+; CHECK: sub  x[[R]], x[[R]], #32
+; CHECK: cbnz x[[R]],
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 256)
+  ret void
+}
+
+define void @stg17(i8* %p) {
+entry:
+; CHECK-LABEL: stg17:
+; CHECK: mov  {{(w|x)}}[[R:[0-9]+]], #256
+; CHECK: stg x0, [x0], #16
+; CHECK: st2g x0, [x0], #32
+; CHECK: sub  x[[R]], x[[R]], #32
+; CHECK: cbnz x[[R]],
+; CHECK: ret
+  call void @llvm.aarch64.settag(i8* %p, i64 272)
+  ret void
+}
+
+define void @stzg3(i8* %p) {
+entry:
+; CHECK-LABEL: stzg3:
+; CHECK: stzg  x0, [x0, #32]
+; CHECK: stz2g x0, [x0]
+; CHECK: ret
+  call void @llvm.aarch64.settag.zero(i8* %p, i64 48)
+  ret void
+}
+
+define void @stzg17(i8* %p) {
+entry:
+; CHECK-LABEL: stzg17:
+; CHECK: mov  {{w|x}}[[R:[0-9]+]], #256
+; CHECK: stzg x0, [x0], #16
+; CHECK: stz2g x0, [x0], #32
+; CHECK: sub  x[[R]], x[[R]], #32
+; CHECK: cbnz x[[R]],
+; CHECK: ret
+  call void @llvm.aarch64.settag.zero(i8* %p, i64 272)
+  ret void
+}
+
+define void @stg_alloca1() {
+entry:
+; CHECK-LABEL: stg_alloca1:
+; CHECK: stg sp, [sp]
+; CHECK: ret
+  %a = alloca i8, i32 16, align 16
+  call void @llvm.aarch64.settag(i8* %a, i64 16)
+  ret void
+}
+
+define void @stg_alloca5() {
+entry:
+; CHECK-LABEL: stg_alloca5:
+; CHECK: stg  sp, [sp, #64]
+; CHECK: st2g sp, [sp, #32]
+; CHECK: st2g sp, [sp]
+; CHECK: ret
+  %a = alloca i8, i32 80, align 16
+  call void @llvm.aarch64.settag(i8* %a, i64 80)
+  ret void
+}
+
+define void @stg_alloca17() {
+entry:
+; CHECK-LABEL: stg_alloca17:
+; CHECK: mov [[P:x[0-9]+]], sp
+; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16
+; CHECK: mov  {{w|x}}[[R:[0-9]+]], #256
+; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32
+; CHECK: sub  x[[R]], x[[R]], #32
+; CHECK: cbnz x[[R]],
+; CHECK: ret
+  %a = alloca i8, i32 272, align 16
+  call void @llvm.aarch64.settag(i8* %a, i64 272)
+  ret void
+}
+
+declare void @llvm.aarch64.settag(i8* %p, i64 %a)
+declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)
diff --git a/llvm/test/CodeGen/AArch64/stgp.ll b/llvm/test/CodeGen/AArch64/stgp.ll
new file mode 100644
index 0000000000000..b4af16fd9ff79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stgp.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
+
+define void @stgp0(i64 %a, i64 %b, i8* %p) {
+entry:
+; CHECK-LABEL: stgp0:
+; CHECK: stgp x0, x1, [x2]
+; CHECK: ret
+  call void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
+  ret void
+}
+
+define void @stgp1004(i64 %a, i64 %b, i8* %p) {
+entry:
+; CHECK-LABEL: stgp1004:
+; CHECK: add [[R:x[0-9]+]], x2, #1004
+; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
+; CHECK: ret
+  %q = getelementptr i8, i8* %p, i32 1004
+  call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
+  ret void
+}
+
+define void @stgp1008(i64 %a, i64 %b, i8* %p) {
+entry:
+; CHECK-LABEL: stgp1008:
+; CHECK: stgp x0, x1, [x2, #1008]
+; CHECK: ret
+  %q = getelementptr i8, i8* %p, i32 1008
+  call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
+  ret void
+}
+
+define void @stgp1024(i64 %a, i64 %b, i8* %p) {
+entry:
+; CHECK-LABEL: stgp1024:
+; CHECK: add [[R:x[0-9]+]], x2, #1024
+; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}}
+; CHECK: ret
+  %q = getelementptr i8, i8* %p, i32 1024
+  call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
+  ret void
+}
+
+define void @stgp_1024(i64 %a, i64 %b, i8* %p) {
+entry:
+; CHECK-LABEL: stgp_1024:
+; CHECK: stgp x0, x1, [x2, #-1024]
+; CHECK: ret
+  %q = getelementptr i8, i8* %p, i32 -1024
+  call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
+  ret void
+}
+
+define void @stgp_1040(i64 %a, i64 %b, i8* %p) {
+entry:
+; CHECK-LABEL: stgp_1040:
+; CHECK: sub [[R:x[0-9]+]], x2, #1040
+; CHECK: stgp x0, x1, [x{{.*}}]
+; CHECK: ret
+  %q = getelementptr i8, i8* %p, i32 -1040
+  call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b)
+  ret void
+}
+
+define void @stgp_alloca(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: stgp_alloca:
+; CHECK: stgp x0, x1, [sp]
+; CHECK: stgp x1, x0, [sp, #16]
+; CHECK: ret
+  %x = alloca i8, i32 32, align 16
+  call void @llvm.aarch64.stgp(i8* %x, i64 %a, i64 %b)
+  %x1 = getelementptr i8, i8* %x, i32 16
+  call void @llvm.aarch64.stgp(i8* %x1, i64 %b, i64 %a)
+  ret void
+}
+
+declare void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b)
diff --git a/llvm/test/CodeGen/AArch64/tagp.ll b/llvm/test/CodeGen/AArch64/tagp.ll
new file mode 100644
index 0000000000000..0af6538981817
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tagp.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
+
+define i8* @tagp2(i8* %p, i8* %tag) {
+entry:
+; CHECK-LABEL: tagp2:
+; CHECK: subp [[R:x[0-9]+]], x0, x1
+; CHECK: add  [[R]], [[R]], x1
+; CHECK: addg x0, [[R]], #0, #2
+; CHECK: ret
+  %q = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2)
+  ret i8* %q
+}
+
+define i8* @irg_tagp_unrelated(i8* %p, i8* %q) {
+entry:
+; CHECK-LABEL: irg_tagp_unrelated:
+; CHECK: irg  [[R0:x[0-9]+]], x0{{$}}
+; CHECK: subp [[R:x[0-9]+]], [[R0]], x1
+; CHECK: add  [[R]], [[R0]], x1
+; CHECK: addg x0, [[R]], #0, #1
+; CHECK: ret
+  %p1 = call i8* @llvm.aarch64.irg(i8* %p, i64 0)
+  %q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p1, i8* %q, i64 1)
+  ret i8* %q1
+}
+
+define i8* @tagp_alloca(i8* %tag) {
+entry:
+; CHECK-LABEL: tagp_alloca:
+; CHECK: mov  [[R0:x[0-9]+]], sp{{$}}
+; CHECK: subp [[R:x[0-9]+]], [[R0]], x0{{$}}
+; CHECK: add  [[R]], [[R0]], x0{{$}}
+; CHECK: addg x0, [[R]], #0, #3
+; CHECK: ret
+  %a = alloca i8, align 16
+  %q = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 3)
+  ret i8* %q
+}
+
+declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude)
+declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)

From 851339fb29527b483c2ca627d340ef9f4b074732 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Wed, 17 Jul 2019 19:24:12 +0000
Subject: [PATCH 379/451] Basic MTE stack tagging instrumentation.

Summary:
Use MTE intrinsics to tag stack variables in functions with
sanitize_memtag attribute.

Reviewers: pcc, vitalybuka, hctim, ostannard

Subscribers: srhines, mgorny, javed.absar, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64173

llvm-svn: 366361
---
 llvm/lib/Target/AArch64/AArch64.h             |   2 +
 .../Target/AArch64/AArch64StackTagging.cpp    | 345 ++++++++++++++++++
 .../Target/AArch64/AArch64TargetMachine.cpp   |   3 +
 llvm/lib/Target/AArch64/CMakeLists.txt        |   1 +
 llvm/test/CodeGen/AArch64/O0-pipeline.ll      |   1 +
 llvm/test/CodeGen/AArch64/O3-pipeline.ll      |   1 +
 .../test/CodeGen/AArch64/stack-tagging-dbg.ll |  37 ++
 llvm/test/CodeGen/AArch64/stack-tagging.ll    | 187 ++++++++++
 8 files changed, 577 insertions(+)
 create mode 100644 llvm/lib/Target/AArch64/AArch64StackTagging.cpp
 create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll
 create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging.ll

diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 2e63e261c489a..6965403a25ab9 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -56,6 +56,7 @@ InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
                                  AArch64Subtarget &, AArch64RegisterBankInfo &);
 FunctionPass *createAArch64PreLegalizeCombiner();
+FunctionPass *createAArch64StackTaggingPass();
 
 void initializeAArch64A53Fix835769Pass(PassRegistry&);
 void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@@ -78,6 +79,7 @@ void initializeAArch64StorePairSuppressPass(PassRegistry&);
 void initializeFalkorHWPFFixPass(PassRegistry&);
 void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
 void initializeLDTLSCleanupPass(PassRegistry&);
+void initializeAArch64StackTaggingPass(PassRegistry&);
 } // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
new file mode 100644
index 0000000000000..f27a6311365af
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -0,0 +1,345 @@
+//===- AArch64StackTagging.cpp - Stack tagging in IR --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-tagging"
+
+static constexpr unsigned kTagGranuleSize = 16;
+
+namespace {
+class AArch64StackTagging : public FunctionPass {
+  struct AllocaInfo {
+    AllocaInst *AI;
+    SmallVector<IntrinsicInst *, 2> LifetimeStart;
+    SmallVector<IntrinsicInst *, 2> LifetimeEnd;
+    SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
+    int Tag; // -1 for non-tagged allocations
+  };
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+
+  AArch64StackTagging() : FunctionPass(ID) {
+    initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool isInterestingAlloca(const AllocaInst &AI);
+  void alignAndPadAlloca(AllocaInfo &Info);
+
+  void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
+                 uint64_t Size);
+  void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
+
+  Instruction *
+  insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+                          const DominatorTree *DT);
+  bool runOnFunction(Function &F) override;
+
+  StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
+
+private:
+  Function *F;
+  Function *SetTagFunc;
+  const DataLayout *DL;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+  }
+};
+
+} // end anonymous namespace
+
+char AArch64StackTagging::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
+                      false, false)
+INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
+                    false, false)
+
+FunctionPass *llvm::createAArch64StackTaggingPass() {
+  return new AArch64StackTagging();
+}
+
+bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
+  // FIXME: support dynamic allocas
+  bool IsInteresting =
+      AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
+      // alloca() may be called with 0 size, ignore it.
+      AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
+      // inalloca allocas are not treated as static, and we don't want
+      // dynamic alloca instrumentation for them as well.
+      !AI.isUsedWithInAlloca() &&
+      // swifterror allocas are register promoted by ISel
+      !AI.isSwiftError();
+  return IsInteresting;
+}
+
+void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
+                                    Value *Ptr, uint64_t Size) {
+  IRBuilder<> IRB(InsertBefore);
+  IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
+}
+
+void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
+                                      uint64_t Size) {
+  IRBuilder<> IRB(InsertBefore);
+  IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
+                              ConstantInt::get(IRB.getInt64Ty(), Size)});
+}
+
+Instruction *AArch64StackTagging::insertBaseTaggedPointer(
+    const MapVector<AllocaInst *, AllocaInfo> &Allocas,
+    const DominatorTree *DT) {
+  BasicBlock *PrologueBB = nullptr;
+  // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
+  for (auto &I : Allocas) {
+    const AllocaInfo &Info = I.second;
+    AllocaInst *AI = Info.AI;
+    if (Info.Tag < 0)
+      continue;
+    if (!PrologueBB) {
+      PrologueBB = AI->getParent();
+      continue;
+    }
+    PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
+  }
+  assert(PrologueBB);
+
+  IRBuilder<> IRB(&PrologueBB->front());
+  Function *IRG_SP =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
+  Instruction *Base =
+      IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
+  Base->setName("basetag");
+  return Base;
+}
+
+void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
+  unsigned NewAlignment = std::max(Info.AI->getAlignment(), kTagGranuleSize);
+  Info.AI->setAlignment(NewAlignment);
+
+  uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+  uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
+  if (Size == AlignedSize)
+    return;
+
+  // Add padding to the alloca.
+  Type *AllocatedType =
+      Info.AI->isArrayAllocation()
+          ? ArrayType::get(
+                Info.AI->getAllocatedType(),
+                dyn_cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
+          : Info.AI->getAllocatedType();
+  Type *PaddingType =
+      ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
+  Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
+  auto *NewAI = new AllocaInst(
+      TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
+  NewAI->takeName(Info.AI);
+  NewAI->setAlignment(Info.AI->getAlignment());
+  NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
+  NewAI->setSwiftError(Info.AI->isSwiftError());
+  NewAI->copyMetadata(*Info.AI);
+
+  auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
+  Info.AI->replaceAllUsesWith(NewPtr);
+  Info.AI->eraseFromParent();
+  Info.AI = NewAI;
+}
+
+// FIXME: check for MTE extension
+bool AArch64StackTagging::runOnFunction(Function &Fn) {
+  if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
+    return false;
+
+  F = &Fn;
+  DL = &Fn.getParent()->getDataLayout();
+
+  MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
+  SmallVector<Instruction *, 8> RetVec;
+  DenseMap<Value *, AllocaInst *> AllocaForValue;
+  SmallVector<Instruction *, 4> UnrecognizedLifetimes;
+
+  for (auto &BB : *F) {
+    for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
+      Instruction *I = &*IT;
+      if (auto *AI = dyn_cast<AllocaInst>(I)) {
+        Allocas[AI].AI = AI;
+        continue;
+      }
+
+      if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
+        if (auto *AI =
+                dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
+          Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
+        }
+        continue;
+      }
+
+      auto *II = dyn_cast<IntrinsicInst>(I);
+      if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+                 II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+        AllocaInst *AI =
+            llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
+        if (!AI) {
+          UnrecognizedLifetimes.push_back(I);
+          continue;
+        }
+        if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+          Allocas[AI].LifetimeStart.push_back(II);
+        else
+          Allocas[AI].LifetimeEnd.push_back(II);
+      }
+
+      if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
+        RetVec.push_back(I);
+    }
+  }
+
+  if (Allocas.empty())
+    return false;
+
+  int NextTag = 0;
+  int NumInterestingAllocas = 0;
+  for (auto &I : Allocas) {
+    AllocaInfo &Info = I.second;
+    assert(Info.AI);
+
+    if (!isInterestingAlloca(*Info.AI)) {
+      Info.Tag = -1;
+      continue;
+    }
+
+    alignAndPadAlloca(Info);
+    NumInterestingAllocas++;
+    Info.Tag = NextTag;
+    NextTag = (NextTag + 1) % 16;
+  }
+
+  if (NumInterestingAllocas == 0)
+    return true;
+
+  SetTagFunc =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
+
+  // Compute DT only if the function has the attribute, there are more than 1
+  // interesting allocas, and it is not available for free.
+  Instruction *Base;
+  if (NumInterestingAllocas > 1) {
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    if (DTWP) {
+      Base = insertBaseTaggedPointer(Allocas, &DTWP->getDomTree());
+    } else {
+      DominatorTree DT(*F);
+      Base = insertBaseTaggedPointer(Allocas, &DT);
+    }
+  } else {
+    Base = insertBaseTaggedPointer(Allocas, nullptr);
+  }
+
+  for (auto &I : Allocas) {
+    const AllocaInfo &Info = I.second;
+    AllocaInst *AI = Info.AI;
+    if (Info.Tag < 0)
+      continue;
+
+    // Replace alloca with tagp(alloca).
+    IRBuilder<> IRB(Info.AI->getNextNode());
+    Function *TagP = Intrinsic::getDeclaration(
+        F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
+    Instruction *TagPCall =
+        IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
+                              ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
+    if (Info.AI->hasName())
+      TagPCall->setName(Info.AI->getName() + ".tag");
+    Info.AI->replaceAllUsesWith(TagPCall);
+    TagPCall->setOperand(0, Info.AI);
+
+    if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
+        Info.LifetimeEnd.size() == 1) {
+      IntrinsicInst *Start = Info.LifetimeStart[0];
+      uint64_t Size =
+          dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+      Size = alignTo(Size, kTagGranuleSize);
+      tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
+      untagAlloca(AI, Info.LifetimeEnd[0], Size);
+    } else {
+      uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
+      tagAlloca(AI, TagPCall->getNextNode(),
+                IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()), Size);
+      for (auto &RI : RetVec) {
+        untagAlloca(AI, RI, Size);
+      }
+      // We may have inserted tag/untag outside of any lifetime interval.
+      // Remove all lifetime intrinsics for this alloca.
+      for (auto &II : Info.LifetimeStart)
+        II->eraseFromParent();
+      for (auto &II : Info.LifetimeEnd)
+        II->eraseFromParent();
+    }
+
+    // Fixup debug intrinsics to point to the new alloca.
+    for (auto DVI : Info.DbgVariableIntrinsics)
+      DVI->setArgOperand(
+          0,
+          MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
+  }
+
+  // If we have instrumented at least one alloca, all unrecognized lifetime
+  // instrinsics have to go.
+  for (auto &I : UnrecognizedLifetimes)
+    I->eraseFromParent();
+
+  return true;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7ae055ad50363..8654614804997 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -179,6 +179,7 @@ extern "C" void LLVMInitializeAArch64Target() {
   initializeFalkorMarkStridedAccessesLegacyPass(*PR);
   initializeLDTLSCleanupPass(*PR);
   initializeAArch64SpeculationHardeningPass(*PR);
+  initializeAArch64StackTaggingPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -446,6 +447,8 @@ void AArch64PassConfig::addIRPasses() {
     // invariant.
     addPass(createLICMPass());
   }
+
+  addPass(createAArch64StackTaggingPass());
 }
 
 // Pass Pipeline Configuration
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 0fdf8e3d53420..3154ed03bd46d 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_target(AArch64CodeGen
   AArch64RegisterInfo.cpp
   AArch64SelectionDAGInfo.cpp
   AArch64SpeculationHardening.cpp
+  AArch64StackTagging.cpp
   AArch64StorePairSuppress.cpp
   AArch64Subtarget.cpp
   AArch64TargetMachine.cpp
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
index b25d4e94e3e1e..e8f831f6151e2 100644
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -25,6 +25,7 @@
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
+; CHECK-NEXT:       AArch64 Stack Tagging
 ; CHECK-NEXT:     Rewrite Symbols
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Dominator Tree Construction
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 03815481ef5cd..a331ac87f8f48 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -55,6 +55,7 @@
 ; CHECK-NEXT:       Interleaved Load Combine Pass
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Interleaved Access Pass
+; CHECK-NEXT:       AArch64 Stack Tagging
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       CodeGen Prepare
 ; CHECK-NEXT:     Rewrite Symbols
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll b/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll
new file mode 100644
index 0000000000000..9ca4ff59b24f2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -stack-tagging -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+declare void @use32(i32*)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone speculatable
+
+; Debug intrinsics use the new alloca directly, not through a GEP or a tagp.
+define void @DbgIntrinsics() sanitize_memtag {
+entry:
+  %x = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata i32* %x, metadata !6, metadata !DIExpression()), !dbg !10
+  store i32 42, i32* %x, align 4
+  call void @use32(i32* %x)
+  ret void
+}
+
+; CHECK-LABEL: define void @DbgIntrinsics(
+; CHECK:  [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
+; CHECK:  call void @llvm.dbg.declare(metadata { i32, [12 x i8] }* [[X]],
+
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "stack-tagging.cc", directory: "/tmp")
+!2 = !{}
+!3 = distinct !DISubprogram(name: "DbgIntrinsics", linkageName: "DbgIntrinsics", scope: !1, file: !1, line: 3, type: !4, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null}
+!6 = !DILocalVariable(name: "x", scope: !3, file: !1, line: 4, type: !7)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !DILocation(line: 1, column: 2, scope: !3)
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging.ll b/llvm/test/CodeGen/AArch64/stack-tagging.ll
new file mode 100644
index 0000000000000..244a0a1edbb25
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-tagging.ll
@@ -0,0 +1,187 @@
+; RUN: opt < %s -stack-tagging -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+declare void @use8(i8*)
+declare void @use32(i32*)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @OneVar() sanitize_memtag {
+entry:
+  %x = alloca i32, align 4
+  call void @use32(i32* %x)
+  ret void
+}
+
+; CHECK-LABEL: define void @OneVar(
+; CHECK:  [[BASE:%.*]] = call i8* @llvm.aarch64.irg.sp(i64 0)
+; CHECK:  [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
+; CHECK:  [[TX:%.*]] = call { i32, [12 x i8] }* @llvm.aarch64.tagp.{{.*}}({ i32, [12 x i8] }* [[X]], i8* [[BASE]], i64 0)
+; CHECK:  [[TX8:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i8*
+; CHECK:  call void @llvm.aarch64.settag(i8* [[TX8]], i64 16)
+; CHECK:  [[GEP32:%.*]] = bitcast { i32, [12 x i8] }* [[TX]] to i32*
+; CHECK:  call void @use32(i32* [[GEP32]])
+; CHECK:  [[GEP8:%.*]] = bitcast { i32, [12 x i8] }* [[X]] to i8*
+; CHECK:  call void @llvm.aarch64.settag(i8* [[GEP8]], i64 16)
+; CHECK:  ret void
+
+
+define void @ManyVars() sanitize_memtag {
+entry:
+  %x1 = alloca i32, align 4
+  %x2 = alloca i8, align 4
+  %x3 = alloca i32, i32 11, align 4
+  call void @use32(i32* %x1)
+  call void @use8(i8* %x2)
+  call void @use32(i32* %x3)
+  ret void
+}
+
+; CHECK-LABEL: define void @ManyVars(
+; CHECK:  alloca { i32, [12 x i8] }, align 16
+; CHECK:  call { i32, [12 x i8] }* @llvm.aarch64.tagp.{{.*}}({ i32, [12 x i8] }* {{.*}}, i64 0)
+; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 16)
+; CHECK:  alloca { i8, [15 x i8] }, align 16
+; CHECK:  call { i8, [15 x i8] }* @llvm.aarch64.tagp.{{.*}}({ i8, [15 x i8] }* {{.*}}, i64 1)
+; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 16)
+; CHECK:  alloca { [11 x i32], [4 x i8] }, align 16
+; CHECK:  call { [11 x i32], [4 x i8] }* @llvm.aarch64.tagp.{{.*}}({ [11 x i32], [4 x i8] }* {{.*}}, i64 2)
+; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 48)
+
+; CHECK:  call void @use32(
+; CHECK:  call void @use8(
+; CHECK:  call void @use32(
+
+; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 16)
+; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 16)
+; CHECK:  call void @llvm.aarch64.settag(i8* {{.*}}, i64 48)
+; CHECK-NEXT:  ret void
+
+
+define void @Scope(i32 %b) sanitize_memtag {
+entry:
+  %x = alloca i32, align 4
+  %tobool = icmp eq i32 %b, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  %0 = bitcast i32* %x to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @use8(i8* %0) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: define void @Scope(
+; CHECK:  br i1
+; CHECK:  call void @llvm.lifetime.start.p0i8(
+; CHECK:  call void @llvm.aarch64.settag(
+; CHECK:  call void @use8(
+; CHECK:  call void @llvm.aarch64.settag(
+; CHECK:  call void @llvm.lifetime.end.p0i8(
+; CHECK:  br label
+; CHECK:  ret void
+
+
+; Spooked by the multiple lifetime ranges, StackTagging remove all of them and sets tags on entry and exit.
+define void @BadScope(i32 %b) sanitize_memtag {
+entry:
+  %x = alloca i32, align 4
+  %tobool = icmp eq i32 %b, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  %0 = bitcast i32* %x to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @use8(i8* %0) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
+  call void @use8(i8* %0) #3
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: define void @BadScope(
+; CHECK:       call void @llvm.aarch64.settag(i8* {{.*}}, i64 16)
+; CHECK:       br i1
+; CHECK:       call void @use8(i8*
+; CHECK-NEXT:  call void @use8(i8*
+; CHECK:       br label
+; CHECK:       call void @llvm.aarch64.settag(i8* {{.*}}, i64 16)
+; CHECK-NEXT:  ret void
+
+define void @DynamicAllocas(i32 %cnt) sanitize_memtag {
+entry:
+  %x = alloca i32, i32 %cnt, align 4
+  br label %l
+l:
+  %y = alloca i32, align 4
+  call void @use32(i32* %x)
+  call void @use32(i32* %y)
+  ret void
+}
+
+; CHECK-LABEL: define void @DynamicAllocas(
+; CHECK-NOT: @llvm.aarch64.irg.sp
+; CHECK:     %x = alloca i32, i32 %cnt, align 4
+; CHECK-NOT: @llvm.aarch64.irg.sp
+; CHECK:     alloca i32, align 4
+; CHECK-NOT: @llvm.aarch64.irg.sp
+; CHECK:     ret void
+
+; If we can't trace one of the lifetime markers to a single alloca, fall back
+; to poisoning all allocas at the beginning of the function.
+; Each alloca must be poisoned only once.
+define void @UnrecognizedLifetime(i8 %v) sanitize_memtag {
+entry:
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  %z = alloca i32, align 4
+  %cx = bitcast i32* %x to i8*
+  %cy = bitcast i32* %y to i8*
+  %cz = bitcast i32* %z to i8*
+  %tobool = icmp eq i8 %v, 0
+  %xy = select i1 %tobool, i32* %x, i32* %y
+  %cxcy = select i1 %tobool, i8* %cx, i8* %cy
+  br label %another_bb
+
+another_bb:
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %cz)
+  store i32 7, i32* %z
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %cz)
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %cz)
+  store i32 7, i32* %z
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %cz)
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %cxcy)
+  store i32 8, i32* %xy
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %cxcy)
+  ret void
+}
+
+; CHECK-LABEL: define void @UnrecognizedLifetime(
+; CHECK: call i8* @llvm.aarch64.irg.sp(i64 0)
+; CHECK: alloca { i32, [12 x i8] }, align 16
+; CHECK: call { i32, [12 x i8] }* @llvm.aarch64.tagp
+; CHECK: call void @llvm.aarch64.settag(
+; CHECK: alloca { i32, [12 x i8] }, align 16
+; CHECK: call { i32, [12 x i8] }* @llvm.aarch64.tagp
+; CHECK: call void @llvm.aarch64.settag(
+; CHECK: alloca { i32, [12 x i8] }, align 16
+; CHECK: call { i32, [12 x i8] }* @llvm.aarch64.tagp
+; CHECK: call void @llvm.aarch64.settag(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: call void @llvm.aarch64.settag(
+; CHECK: call void @llvm.aarch64.settag(
+; CHECK: call void @llvm.aarch64.settag(
+; CHECK: ret void

From ca12cb9482a23cd9596a1e55c94149994bf9909a Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 17 Jul 2019 19:24:15 +0000
Subject: [PATCH 380/451] [CMake] Use LLVM_DIR and Clang_DIR for standalone
 builds.

When doing a standalone build, without setting LLDB_PATH_TO_LLVM_BUILD
or LLDB_PATH_TO_CLANG_BUILD, you get the following error.

```
CMake Error at cmake/modules/LLDBStandalone.cmake:23 (find_package):
  Could not find a package configuration file provided by "LLVM" with any of
  the following names:

    LLVMConfig.cmake
    llvm-config.cmake

  Add the installation prefix of "LLVM" to CMAKE_PREFIX_PATH or set
  "LLVM_DIR" to a directory containing one of the above files.  If "LLVM"
  provides a separate development package or SDK, be sure it has been
  installed.
```

This suggests setting LLVM_DIR to LLVM's install directory. However,
LLDBStandalone.cmake takes LLDB_PATH_TO_LLVM_BUILD as its hint. As
someone who isn't familiar with the standalone process, this is rather
confusing. This patch removes LLDB_PATH_TO_LLVM_BUILD and
LLDB_PATH_TO_CLANG_BUILD and instead use LLVM_DIR and Clang_DIR
respectively.

Differential revision: https://reviews.llvm.org/D64823

llvm-svn: 366362
---
 lldb/cmake/modules/LLDBStandalone.cmake | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 2d7ee2574e3f7..8d89f7c2943a0 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -2,16 +2,8 @@ project(lldb)
 
 option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
 
-set(LLDB_PATH_TO_LLVM_BUILD "" CACHE PATH "Path to LLVM build tree")
-set(LLDB_PATH_TO_CLANG_BUILD "${LLDB_PATH_TO_LLVM_BUILD}" CACHE PATH "Path to Clang build tree")
-
-file(TO_CMAKE_PATH "${LLDB_PATH_TO_LLVM_BUILD}" LLDB_PATH_TO_LLVM_BUILD)
-file(TO_CMAKE_PATH "${LLDB_PATH_TO_CLANG_BUILD}" LLDB_PATH_TO_CLANG_BUILD)
-
-find_package(LLVM REQUIRED CONFIG
-  HINTS "${LLDB_PATH_TO_LLVM_BUILD}" NO_CMAKE_FIND_ROOT_PATH)
-find_package(Clang REQUIRED CONFIG
-  HINTS "${LLDB_PATH_TO_CLANG_BUILD}" NO_CMAKE_FIND_ROOT_PATH)
+find_package(LLVM REQUIRED CONFIG HINTS "${LLVM_DIR}" NO_CMAKE_FIND_ROOT_PATH)
+find_package(Clang REQUIRED CONFIG HINTS "${Clang_DIR}" NO_CMAKE_FIND_ROOT_PATH)
 
 # We set LLVM_CMAKE_PATH so that GetSVN.cmake is found correctly when building SVNVersion.inc
 set(LLVM_CMAKE_PATH ${LLVM_CMAKE_DIR} CACHE PATH "Path to LLVM CMake modules")
@@ -40,7 +32,7 @@ find_program(lit_full_path ${lit_file_name} ${config_dirs} NO_DEFAULT_PATH)
 set(LLVM_DEFAULT_EXTERNAL_LIT ${lit_full_path} CACHE PATH "Path to llvm-lit")
 
 if(CMAKE_CROSSCOMPILING)
-  set(LLVM_NATIVE_BUILD "${LLDB_PATH_TO_LLVM_BUILD}/NATIVE")
+  set(LLVM_NATIVE_BUILD "${LLVM_BINARY_DIR}/NATIVE")
   if (NOT EXISTS "${LLVM_NATIVE_BUILD}")
     message(FATAL_ERROR
       "Attempting to cross-compile LLDB standalone but no native LLVM build

From e37750b93440de1d4b42cb731b0f20c981e6f74d Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 17 Jul 2019 19:36:20 +0000
Subject: [PATCH 381/451] [CMake] Remove duplicated logic to find Python when
 doing a standalone build

I'm pretty sure there's no need to have this logic living in
LLDBStandalone. It doesn't appear anything in LLVM depends on this, and
We always go through LLDBConfig.cmake which has the canonical way to
find the Python libs and interpreter for LLDB.

Differential revision: https://reviews.llvm.org/D64821

llvm-svn: 366363
---
 lldb/cmake/modules/LLDBStandalone.cmake | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 8d89f7c2943a0..b726797f87657 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -77,18 +77,6 @@ include(HandleLLVMOptions)
 include(CheckAtomic)
 include(LLVMDistributionSupport)
 
-set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 2.7)
-if (PYTHON_EXECUTABLE STREQUAL "")
-  include(FindPythonInterp)
-  if( NOT PYTHONINTERP_FOUND )
-    message(FATAL_ERROR
-            "Unable to find Python interpreter, required for builds and testing.
-              Please install Python or specify the PYTHON_EXECUTABLE CMake variable.")
-  endif()
-else()
-  message(STATUS "Found PythonInterp: ${PYTHON_EXECUTABLE}")
-endif()
-
 set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
 set(LLVM_INCLUDE_TESTS ON CACHE INTERNAL "")
 

From c2cd84bcfbd68e1368874a373744e4c978f2a762 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 17 Jul 2019 19:49:01 +0000
Subject: [PATCH 382/451] [docs] Adjust variable formatting table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While the in-place hints on valid formats are up to date (e.g. when
choosing an invalid format expr -f nonExisting -- 42), the corresponding
online docs table is not. The formats "address", "hex float",
"instruction" and "void" are missing, and "decimal" refers to an
outdated abbreviation 'i' instead of 'd'.

Patch by: Lukas Böger

Differential revision: https://reviews.llvm.org/D63813

llvm-svn: 366364
---
 lldb/docs/use/variable.rst | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lldb/docs/use/variable.rst b/lldb/docs/use/variable.rst
index 0e65f42be45de..f3bde2de4144d 100644
--- a/lldb/docs/use/variable.rst
+++ b/lldb/docs/use/variable.rst
@@ -197,7 +197,7 @@ pick:
 +-----------------------------------------------+------------------+--------------------------------------------------------------------------+
 | ``c-string``                                  | s                | show this as a 0-terminated C string                                     |
 +-----------------------------------------------+------------------+--------------------------------------------------------------------------+
-| ``decimal``                                   | i                | show this as a signed integer number (this does not perform a cast, it   |
+| ``decimal``                                   | d                | show this as a signed integer number (this does not perform a cast, it   |
 |                                               |                  | simply shows the bytes as  an integer with sign)                         |
 +-----------------------------------------------+------------------+--------------------------------------------------------------------------+
 | ``enumeration``                               | E                | show this as an enumeration, printing the                                |
@@ -239,6 +239,15 @@ pick:
 +-----------------------------------------------+------------------+--------------------------------------------------------------------------+
 | ``character array``                           | a                | show this as a character array                                           |
 +-----------------------------------------------+------------------+--------------------------------------------------------------------------+
+| ``address``                                   | A                | show this as an address target (symbol/file/line + offset), possibly     |
+|                                               |                  | also the string this address is pointing to                              |
++-----------------------------------------------+------------------+--------------------------------------------------------------------------+
+| ``hex float``                                 |                  | show this as hexadecimal floating point                                  |
++-----------------------------------------------+------------------+--------------------------------------------------------------------------+
+| ``instruction``                               | i                | show this as an disassembled opcode                                      |
++-----------------------------------------------+------------------+--------------------------------------------------------------------------+
+| ``void``                                      | v                | don't show anything                                                      |
++-----------------------------------------------+------------------+--------------------------------------------------------------------------+
 
 Type Summary
 ------------

From a0858e2f20c84df1be9d0add9b726996bbe395a4 Mon Sep 17 00:00:00 2001
From: Shafik Yaghmour <syaghmour@apple.com>
Date: Wed, 17 Jul 2019 20:16:13 +0000
Subject: [PATCH 383/451] Fix CreateFunctionTemplateSpecialization to prevent
 dangling poiner to stack memory

In ClangASTContext::CreateFunctionTemplateSpecializationInfo a TemplateArgumentList is allocated on the stack but is treated as if it is persistent in subsequent calls. When we exit the function func_decl will still point to the stack allocated memory. We will use TemplateArgumentList::CreateCopy instead which will allocate memory out of the DeclContext.

Differential Revision: https://reviews.llvm.org/D64777

llvm-svn: 366365
---
 .../Makefile                                    |  5 +++++
 ...estFunctionTemplateSpecializationTempArgs.py | 17 +++++++++++++++++
 .../main.cpp                                    | 17 +++++++++++++++++
 lldb/source/Symbol/ClangASTContext.cpp          |  7 ++++---
 4 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/Makefile
 create mode 100644 lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/TestFunctionTemplateSpecializationTempArgs.py
 create mode 100644 lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/main.cpp

diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/Makefile b/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/Makefile
new file mode 100644
index 0000000000000..8a7102e347af2
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/Makefile
@@ -0,0 +1,5 @@
+LEVEL = ../../make
+
+CXX_SOURCES := main.cpp
+
+include $(LEVEL)/Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/TestFunctionTemplateSpecializationTempArgs.py b/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/TestFunctionTemplateSpecializationTempArgs.py
new file mode 100644
index 0000000000000..bd5bc0ec72a97
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/TestFunctionTemplateSpecializationTempArgs.py
@@ -0,0 +1,17 @@
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class TestFunctionTemplateSpecializationTempArgs(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    def test_function_template_specialization_temp_args(self):
+        self.build()
+
+        (self.target, self.process, _, bkpt) = lldbutil.run_to_source_breakpoint(self, '// break here',
+                lldb.SBFileSpec("main.cpp", False))
+
+        self.expect("expr p0",
+                substrs=['(VType) $0 = {}'])
diff --git a/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/main.cpp b/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/main.cpp
new file mode 100644
index 0000000000000..6d01288259afe
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/expression_command/function_template_specialization_temp_args/main.cpp
@@ -0,0 +1,17 @@
+template <typename T> struct M {};
+
+template <typename T> void f(T &t);
+
+template <> void f<int>(int &t) {
+  typedef M<int> VType;
+
+  VType p0; // break here
+}
+
+int main() {
+  int x;
+
+  f(x);
+
+  return 0;
+}
diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp
index f85c5d2b9e05d..205523355ce0b 100644
--- a/lldb/source/Symbol/ClangASTContext.cpp
+++ b/lldb/source/Symbol/ClangASTContext.cpp
@@ -1615,10 +1615,11 @@ clang::FunctionTemplateDecl *ClangASTContext::CreateFunctionTemplateDecl(
 void ClangASTContext::CreateFunctionTemplateSpecializationInfo(
     FunctionDecl *func_decl, clang::FunctionTemplateDecl *func_tmpl_decl,
     const TemplateParameterInfos &infos) {
-  TemplateArgumentList template_args(TemplateArgumentList::OnStack, infos.args);
+  TemplateArgumentList *template_args_ptr =
+      TemplateArgumentList::CreateCopy(func_decl->getASTContext(), infos.args);
 
-  func_decl->setFunctionTemplateSpecialization(func_tmpl_decl, &template_args,
-                                               nullptr);
+  func_decl->setFunctionTemplateSpecialization(func_tmpl_decl,
+                                               template_args_ptr, nullptr);
 }
 
 ClassTemplateDecl *ClangASTContext::CreateClassTemplateDecl(

From 914a59cad825f322ef10273327e708fe5eb78283 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 17 Jul 2019 20:22:38 +0000
Subject: [PATCH 384/451] GlobalISel: Handle more cases for widenScalar of
 G_MERGE_VALUES

Use an anyext to the requested type for the leftover operand to
produce a slightly wider type, and then truncate the final merge.

I have another implementation almost ready which handles arbitrary
widens, but I think it produces worse code in this example (which I
think is 90% due to not folding redundant copies or folding out
implicit_def users), so I wanted to add this as a baseline first.

llvm-svn: 366366
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 27 ++++++--
 .../GlobalISel/legalize-merge-values.mir      | 61 +++++++++++++++++++
 .../GlobalISel/LegalizerHelperTest.cpp        | 34 +++++++++++
 3 files changed, 118 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 958e9b59cf443..caa49cf2cf771 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -875,10 +875,14 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
 
   // Try to turn this into a merge of merges if we can use the requested type as
   // the source.
-
-  // TODO: Pad with undef if DstTy > WideTy
-  if (NumMerge > 1 && WideTy.getSizeInBits() % SrcTy.getSizeInBits() == 0) {
+  if (NumMerge > 1) {
     int PartsPerMerge = WideTy.getSizeInBits() / SrcTy.getSizeInBits();
+    if (WideTy.getSizeInBits() % SrcTy.getSizeInBits() != 0)
+      return UnableToLegalize;
+
+    int RemainderBits = DstTy.getSizeInBits() % WideTy.getSizeInBits();
+    int RemainderParts = RemainderBits / SrcTy.getSizeInBits();
+
     SmallVector<Register, 4> Parts;
     SmallVector<Register, 4> SubMerges;
 
@@ -891,7 +895,22 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
       Parts.clear();
     }
 
-    MIRBuilder.buildMerge(DstReg, SubMerges);
+    if (RemainderParts == 0) {
+      MIRBuilder.buildMerge(DstReg, SubMerges);
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
+    assert(RemainderParts == 1);
+
+    auto AnyExt = MIRBuilder.buildAnyExt(
+      WideTy, MI.getOperand(MI.getNumOperands() - 1).getReg());
+    SubMerges.push_back(AnyExt.getReg(0));
+
+    LLT WiderDstTy = LLT::scalar(SubMerges.size() * WideTy.getSizeInBits());
+    auto Merge = MIRBuilder.buildMerge(WiderDstTy, SubMerges);
+    MIRBuilder.buildTrunc(DstReg, Merge);
+
     MI.eraseFromParent();
     return Legalized;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
index 7438180111db7..20b65021b3103 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
@@ -535,3 +535,64 @@ body: |
     %8:_(s96) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5
     $vgpr0_vgpr1_vgpr2 = COPY %8
 ...
+
+---
+name: test_merge_s56_s8_s8_s8_s8_s8_s8_s8
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_merge_s56_s8_s8_s8_s8_s8_s8_s8
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C7]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C9]]
+    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C10]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]]
+    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C10]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]]
+    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C10]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC6]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[C6]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[TRUNC7]](s16)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64)
+    ; CHECK: S_NOP 0, implicit [[TRUNC8]](s56)
+    %0:_(s8) = G_CONSTANT i8 0
+    %1:_(s8) = G_CONSTANT i8 1
+    %2:_(s8) = G_CONSTANT i8 2
+    %3:_(s8) = G_CONSTANT i8 3
+    %4:_(s8) = G_CONSTANT i8 4
+    %5:_(s8) = G_CONSTANT i8 5
+    %6:_(s8) = G_CONSTANT i8 6
+    %7:_(s56) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6
+    S_NOP 0, implicit %7
+...
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 2ba95ab0826ef..608c0ddf08475 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -9,6 +9,10 @@
 
 #include "GISelMITest.h"
 
+using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
 namespace {
 
 class DummyGISelObserver : public GISelChangeObserver {
@@ -900,4 +904,34 @@ TEST_F(GISelMITest, WidenScalarBuildVector) {
   EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
 }
 
+TEST_F(GISelMITest, LowerMergeValues) {
+  if (!TM)
+    return;
+
+  const LLT S24 = LLT::scalar(24);
+  const LLT S9 = LLT::scalar(9);
+  const LLT S3 = LLT::scalar(3);
+
+  DefineLegalizerInfo(A, {
+    getActionDefinitionsBuilder(G_UNMERGE_VALUES)
+      .widenScalarIf(typeIs(1, LLT::scalar(3)), changeTo(1, LLT::scalar(9)));
+  });
+
+  AInfo Info(MF->getSubtarget());
+  DummyGISelObserver Observer;
+  LegalizerHelper Helper(*MF, Info, Observer, B);
+  B.setInsertPt(*EntryMBB, EntryMBB->end());
+
+  // 24 = 3 3 3   3 3 3   3 3
+  //     => 9
+  //
+  // This can do 2 merges for the first parts, but has 2 leftover operands.
+  SmallVector<Register, 7> MergeOps;
+  for (int I = 0; I != 8; ++I)
+    MergeOps.push_back(B.buildConstant(S3, I).getReg(0));
+
+  auto Merge = B.buildMerge(S24, MergeOps);
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize,
+            Helper.lower(*Merge, 1, S9));
+}
 } // namespace

From 0966dd0d69cf66bcba55a4b5d28b1059b5c9b6a6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 17 Jul 2019 20:22:44 +0000
Subject: [PATCH 385/451] GlobalISel: Handle widenScalar of arbitrary
 G_MERGE_VALUES sources

Extract the sources to the GCD of the original size and target size,
padding with implicit_def as necessary.

Also fix the case where the requested source type is wider than the
original result type. This was ignoring the type, and just using the
destination. Do the operation in the requested type and truncate back.

llvm-svn: 366367
---
 llvm/include/llvm/Support/MathExtras.h        |  11 +-
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 132 ++++--
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   |   3 +
 .../GlobalISel/legalize-merge-values.mir      | 447 +++++++++++++-----
 .../GlobalISel/LegalizerHelperTest.cpp        |  83 +++-
 5 files changed, 490 insertions(+), 186 deletions(-)

diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 85d5a5ae4b903..249139e824b55 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -559,15 +559,20 @@ inline unsigned Log2_64_Ceil(uint64_t Value) {
 }
 
 /// Return the greatest common divisor of the values using Euclid's algorithm.
-inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+template <typename T>
+inline T greatestCommonDivisor(T A, T B) {
   while (B) {
-    uint64_t T = B;
+    T Tmp = B;
     B = A % B;
-    A = T;
+    A = Tmp;
   }
   return A;
 }
 
+inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+  return greatestCommonDivisor<uint64_t>(A, B);
+}
+
 /// This function takes a 64-bit integer and returns the bit equivalent double.
 inline double BitsToDouble(uint64_t Bits) {
   double D;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index caa49cf2cf771..f5cf7fc9bd9ba 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -871,71 +871,107 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
 
   Register Src1 = MI.getOperand(1).getReg();
   LLT SrcTy = MRI.getType(Src1);
-  int NumMerge = DstTy.getSizeInBits() / WideTy.getSizeInBits();
+  const int DstSize = DstTy.getSizeInBits();
+  const int SrcSize = SrcTy.getSizeInBits();
+  const int WideSize = WideTy.getSizeInBits();
+  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
 
-  // Try to turn this into a merge of merges if we can use the requested type as
-  // the source.
-  if (NumMerge > 1) {
-    int PartsPerMerge = WideTy.getSizeInBits() / SrcTy.getSizeInBits();
-    if (WideTy.getSizeInBits() % SrcTy.getSizeInBits() != 0)
-      return UnableToLegalize;
-
-    int RemainderBits = DstTy.getSizeInBits() % WideTy.getSizeInBits();
-    int RemainderParts = RemainderBits / SrcTy.getSizeInBits();
+  unsigned NumOps = MI.getNumOperands();
+  unsigned NumSrc = MI.getNumOperands() - 1;
+  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
 
-    SmallVector<Register, 4> Parts;
-    SmallVector<Register, 4> SubMerges;
+  if (WideSize >= DstSize) {
+    // Directly pack the bits in the target type.
+    Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
 
-    for (int I = 0; I != NumMerge; ++I) {
-      for (int J = 0; J != PartsPerMerge; ++J)
-        Parts.push_back(MI.getOperand(I * PartsPerMerge + J + 1).getReg());
+    for (unsigned I = 2; I != NumOps; ++I) {
+      const unsigned Offset = (I - 1) * PartSize;
 
-      auto SubMerge = MIRBuilder.buildMerge(WideTy, Parts);
-      SubMerges.push_back(SubMerge.getReg(0));
-      Parts.clear();
-    }
+      Register SrcReg = MI.getOperand(I).getReg();
+      assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
 
-    if (RemainderParts == 0) {
-      MIRBuilder.buildMerge(DstReg, SubMerges);
-      MI.eraseFromParent();
-      return Legalized;
-    }
+      auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
 
-    assert(RemainderParts == 1);
+      Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+        MRI.createGenericVirtualRegister(WideTy);
 
-    auto AnyExt = MIRBuilder.buildAnyExt(
-      WideTy, MI.getOperand(MI.getNumOperands() - 1).getReg());
-    SubMerges.push_back(AnyExt.getReg(0));
+      auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+      auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+      MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+      ResultReg = NextResult;
+    }
 
-    LLT WiderDstTy = LLT::scalar(SubMerges.size() * WideTy.getSizeInBits());
-    auto Merge = MIRBuilder.buildMerge(WiderDstTy, SubMerges);
-    MIRBuilder.buildTrunc(DstReg, Merge);
+    if (WideSize > DstSize)
+      MIRBuilder.buildTrunc(DstReg, ResultReg);
 
     MI.eraseFromParent();
     return Legalized;
   }
 
-  unsigned NumOps = MI.getNumOperands();
-  unsigned NumSrc = MI.getNumOperands() - 1;
-  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
-
-  Register ResultReg = MIRBuilder.buildZExt(DstTy, Src1).getReg(0);
-
-  for (unsigned I = 2; I != NumOps; ++I) {
-    const unsigned Offset = (I - 1) * PartSize;
-
+  // Unmerge the original values to the GCD type, and recombine to the next
+  // multiple greater than the original type.
+  //
+  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
+  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
+  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
+  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
+  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
+  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
+  // %12:_(s12) = G_MERGE_VALUES %10, %11
+  //
+  // Padding with undef if necessary:
+  //
+  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
+  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
+  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
+  // %7:_(s2) = G_IMPLICIT_DEF
+  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
+  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
+  // %10:_(s12) = G_MERGE_VALUES %8, %9
+
+  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
+  LLT GCDTy = LLT::scalar(GCD);
+
+  SmallVector<Register, 8> Parts;
+  SmallVector<Register, 8> NewMergeRegs;
+  SmallVector<Register, 8> Unmerges;
+  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
+
+  // Decompose the original operands if they don't evenly divide.
+  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
     Register SrcReg = MI.getOperand(I).getReg();
-    assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
+    if (GCD == SrcSize) {
+      Unmerges.push_back(SrcReg);
+    } else {
+      auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+      for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
+        Unmerges.push_back(Unmerge.getReg(J));
+    }
+  }
 
-    auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
+  // Pad with undef to the next size that is a multiple of the requested size.
+  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
+    Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+    for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
+      Unmerges.push_back(UndefReg);
+  }
 
-    Register NextResult = I + 1 == NumOps ? DstReg :
-      MRI.createGenericVirtualRegister(DstTy);
+  const int PartsPerGCD = WideSize / GCD;
 
-    auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
-    auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
-    MIRBuilder.buildOr(NextResult, ResultReg, Shl);
-    ResultReg = NextResult;
+  // Build merges of each piece.
+  ArrayRef<Register> Slicer(Unmerges);
+  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
+    auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
+    NewMergeRegs.push_back(Merge.getReg(0));
+  }
+
+  // A truncate may be necessary if the requested type doesn't evenly divide the
+  // original result type.
+  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
+    MIRBuilder.buildMerge(DstReg, NewMergeRegs);
+  } else {
+    auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
+    MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
   }
 
   MI.eraseFromParent();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 8d94454b092a4..b7a73326b85c6 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -568,6 +568,7 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+  assert(TmpVec.size() > 1);
   return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
 }
 
@@ -577,6 +578,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+  assert(TmpVec.size() > 1);
   return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
 
@@ -595,6 +597,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
   // we need some temporary storage for the DstOp objects. Here we use a
   // sufficiently large SmallVector to not go through the heap.
   SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+  assert(TmpVec.size() > 1);
   return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
index 20b65021b3103..7c5f9e7566cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
@@ -1,5 +1,11 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=2 -pass-remarks-missed='gisel*' -o - %s 2> %t  | FileCheck %s
+# FileCheck -check-prefix=ERR %s < %t
+
+# ERR-NOT: remark:
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %197:_(s136) = G_INSERT %209:_, %206:_(s8), 128 (in function: test_merge_s68_s17_s17_s17_s17)
+# ERR-NOT: remark:
+
 
 ---
 name: test_merge_p1_s8
@@ -120,29 +126,34 @@ body: |
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]]
-    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C3]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C5]]
+    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]]
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C5]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]]
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR1]](s32)
-    ; CHECK: $vgpr0 = COPY [[COPY8]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]]
+    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C5]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C6]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[MV]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY4]](s32)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -250,56 +261,66 @@ body: |
     ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
     ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
-    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C7]]
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C6]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C8]]
+    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C8]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C8]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]]
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C8]]
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C9]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C8]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C9]]
     ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]]
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C8]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC3]]
+    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C8]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C9]]
     ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[AND5]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[OR1]](s32)
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[COPY11]]
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C7]]
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C8]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32)
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[COPY15]]
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C7]]
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C8]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32)
-    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[COPY19]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[OR4]](s32)
-    ; CHECK: S_NOP 0, implicit [[TRUNC]](s24)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC4]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]]
+    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C8]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C9]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[AND8]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND7]], [[TRUNC6]]
+    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]]
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]]
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32)
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC7]]
+    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C8]]
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C9]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[AND12]](s32)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC8]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR5]](s16)
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s24) = G_TRUNC [[MV]](s32)
+    ; CHECK: S_NOP 0, implicit [[TRUNC9]](s24)
     %0:_(s4) = G_CONSTANT i4 0
     %1:_(s4) = G_CONSTANT i4 1
     %2:_(s4) = G_CONSTANT i4 2
@@ -322,65 +343,66 @@ body: |
     ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
     ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 268435455
-    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C8]]
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C7]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
+    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C9]]
+    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C9]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]]
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C8]]
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C9]]
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C10]]
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
+    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C9]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C10]]
     ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]]
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C8]]
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C9]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC3]]
+    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C9]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C10]]
     ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[AND5]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[OR1]](s32)
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[COPY11]]
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C8]]
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C9]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32)
-    ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[OR2]](s32)
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[COPY15]]
-    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-    ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C8]]
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C9]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32)
-    ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[OR3]](s32)
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[COPY19]]
-    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
-    ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C8]]
-    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C9]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[AND11]](s32)
-    ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[OR4]](s32)
-    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY22]], [[COPY23]]
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s28) = G_TRUNC [[OR5]](s32)
-    ; CHECK: S_NOP 0, implicit [[TRUNC]](s28)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC4]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]]
+    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C10]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[AND8]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND7]], [[TRUNC6]]
+    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C9]]
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C10]]
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32)
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC7]]
+    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C9]]
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C10]]
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[AND12]](s32)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC8]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR2]](s16), [[OR5]](s16)
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s28) = G_TRUNC [[MV]](s32)
+    ; CHECK: S_NOP 0, implicit [[TRUNC9]](s28)
     %0:_(s4) = G_CONSTANT i4 0
     %1:_(s4) = G_CONSTANT i4 1
     %2:_(s4) = G_CONSTANT i4 2
@@ -548,6 +570,7 @@ body: |
     ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
     ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
     ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
     ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C7]](s32)
     ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
@@ -583,9 +606,18 @@ body: |
     ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
     ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC6]]
     ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[C6]](s32)
-    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[TRUNC7]](s16)
-    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64)
-    ; CHECK: S_NOP 0, implicit [[TRUNC8]](s56)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[TRUNC]]
+    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C9]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C10]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[TRUNC8]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16)
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s56) = G_TRUNC [[MV]](s64)
+    ; CHECK: S_NOP 0, implicit [[TRUNC9]](s56)
     %0:_(s8) = G_CONSTANT i8 0
     %1:_(s8) = G_CONSTANT i8 1
     %2:_(s8) = G_CONSTANT i8 2
@@ -596,3 +628,160 @@ body: |
     %7:_(s56) = G_MERGE_VALUES %0, %1, %2, %3, %4, %5, %6
     S_NOP 0, implicit %7
 ...
+
+---
+name: test_merge_s68_s17_s17_s17_s17
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_merge_s68_s17_s17_s17_s17
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC %142(s16)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC %143(s16)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[C]](s32)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s1) = G_TRUNC %144(s16)
+    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s1) = G_TRUNC %145(s16)
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s17) = G_TRUNC [[C1]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s1) = G_TRUNC %146(s16)
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s1) = G_TRUNC %147(s16)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s17) = G_TRUNC [[C2]](s32)
+    ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s17) = G_TRUNC [[C3]](s32)
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s272) = G_ZEXT [[TRUNC2]](s17)
+    ; CHECK: [[C4:%[0-9]+]]:_(s272) = G_CONSTANT i272 15
+    ; CHECK: [[SHL:%[0-9]+]]:_(s272) = G_SHL [[ZEXT]], [[C4]](s272)
+    ; CHECK: [[OR:%[0-9]+]]:_(s272) = G_OR [[ZEXT]], [[SHL]]
+    ; CHECK: [[C5:%[0-9]+]]:_(s272) = G_CONSTANT i272 30
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s272) = G_SHL [[OR]], [[C5]](s272)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s272) = G_OR [[OR]], [[SHL1]]
+    ; CHECK: [[C6:%[0-9]+]]:_(s272) = G_CONSTANT i272 45
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s272) = G_SHL [[OR1]], [[C6]](s272)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s272) = G_OR [[OR1]], [[SHL2]]
+    ; CHECK: [[C7:%[0-9]+]]:_(s272) = G_CONSTANT i272 60
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s272) = G_SHL [[OR2]], [[C7]](s272)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s272) = G_OR [[OR2]], [[SHL3]]
+    ; CHECK: [[C8:%[0-9]+]]:_(s272) = G_CONSTANT i272 75
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s272) = G_SHL [[OR3]], [[C8]](s272)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s272) = G_OR [[OR3]], [[SHL4]]
+    ; CHECK: [[C9:%[0-9]+]]:_(s272) = G_CONSTANT i272 90
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s272) = G_SHL [[OR4]], [[C9]](s272)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s272) = G_OR [[OR4]], [[SHL5]]
+    ; CHECK: [[C10:%[0-9]+]]:_(s272) = G_CONSTANT i272 105
+    ; CHECK: [[SHL6:%[0-9]+]]:_(s272) = G_SHL [[OR5]], [[C10]](s272)
+    ; CHECK: [[OR6:%[0-9]+]]:_(s272) = G_OR [[OR5]], [[SHL6]]
+    ; CHECK: [[C11:%[0-9]+]]:_(s272) = G_CONSTANT i272 120
+    ; CHECK: [[SHL7:%[0-9]+]]:_(s272) = G_SHL [[OR6]], [[C11]](s272)
+    ; CHECK: [[OR7:%[0-9]+]]:_(s272) = G_OR [[OR6]], [[SHL7]]
+    ; CHECK: [[C12:%[0-9]+]]:_(s272) = G_CONSTANT i272 135
+    ; CHECK: [[SHL8:%[0-9]+]]:_(s272) = G_SHL [[OR7]], [[C12]](s272)
+    ; CHECK: [[OR8:%[0-9]+]]:_(s272) = G_OR [[OR7]], [[SHL8]]
+    ; CHECK: [[C13:%[0-9]+]]:_(s272) = G_CONSTANT i272 150
+    ; CHECK: [[SHL9:%[0-9]+]]:_(s272) = G_SHL [[OR8]], [[C13]](s272)
+    ; CHECK: [[OR9:%[0-9]+]]:_(s272) = G_OR [[OR8]], [[SHL9]]
+    ; CHECK: [[C14:%[0-9]+]]:_(s272) = G_CONSTANT i272 165
+    ; CHECK: [[SHL10:%[0-9]+]]:_(s272) = G_SHL [[OR9]], [[C14]](s272)
+    ; CHECK: [[OR10:%[0-9]+]]:_(s272) = G_OR [[OR9]], [[SHL10]]
+    ; CHECK: [[C15:%[0-9]+]]:_(s272) = G_CONSTANT i272 180
+    ; CHECK: [[SHL11:%[0-9]+]]:_(s272) = G_SHL [[OR10]], [[C15]](s272)
+    ; CHECK: [[OR11:%[0-9]+]]:_(s272) = G_OR [[OR10]], [[SHL11]]
+    ; CHECK: [[C16:%[0-9]+]]:_(s272) = G_CONSTANT i272 195
+    ; CHECK: [[SHL12:%[0-9]+]]:_(s272) = G_SHL [[OR11]], [[C16]](s272)
+    ; CHECK: [[OR12:%[0-9]+]]:_(s272) = G_OR [[OR11]], [[SHL12]]
+    ; CHECK: [[C17:%[0-9]+]]:_(s272) = G_CONSTANT i272 210
+    ; CHECK: [[SHL13:%[0-9]+]]:_(s272) = G_SHL [[OR12]], [[C17]](s272)
+    ; CHECK: [[OR13:%[0-9]+]]:_(s272) = G_OR [[OR12]], [[SHL13]]
+    ; CHECK: [[C18:%[0-9]+]]:_(s272) = G_CONSTANT i272 225
+    ; CHECK: [[SHL14:%[0-9]+]]:_(s272) = G_SHL [[OR13]], [[C18]](s272)
+    ; CHECK: [[OR14:%[0-9]+]]:_(s272) = G_OR [[OR13]], [[SHL14]]
+    ; CHECK: [[C19:%[0-9]+]]:_(s272) = G_CONSTANT i272 240
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s32) = G_TRUNC [[C19]](s272)
+    ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
+    ; CHECK: [[UV:%[0-9]+]]:_(s136), [[UV1:%[0-9]+]]:_(s136) = G_UNMERGE_VALUES [[OR14]](s272)
+    ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC10]], [[C20]]
+    ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C20]], [[TRUNC10]]
+    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC10]](s32), [[C20]]
+    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC10]](s32), [[C]]
+    ; CHECK: [[SHL15:%[0-9]+]]:_(s136) = G_SHL [[UV1]], [[TRUNC10]](s32)
+    ; CHECK: [[SHL16:%[0-9]+]]:_(s136) = G_SHL [[UV1]], [[TRUNC10]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s136) = G_LSHR [[UV]], [[SUB1]](s32)
+    ; CHECK: [[OR15:%[0-9]+]]:_(s136) = G_OR [[SHL16]], [[LSHR]]
+    ; CHECK: [[C21:%[0-9]+]]:_(s136) = G_CONSTANT i136 0
+    ; CHECK: [[SHL17:%[0-9]+]]:_(s136) = G_SHL [[UV]], [[SUB]](s32)
+    ; CHECK: [[SELECT:%[0-9]+]]:_(s136) = G_SELECT [[ICMP]](s1), [[SHL15]], [[C21]]
+    ; CHECK: [[SELECT1:%[0-9]+]]:_(s136) = G_SELECT [[ICMP]](s1), [[OR15]], [[SHL17]]
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s136), 0
+    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[UV1]](s136), 64
+    ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s8) = G_EXTRACT [[UV1]](s136), 128
+    ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[SELECT1]](s136), 0
+    ; CHECK: [[EXTRACT4:%[0-9]+]]:_(s64) = G_EXTRACT [[SELECT1]](s136), 64
+    ; CHECK: [[EXTRACT5:%[0-9]+]]:_(s8) = G_EXTRACT [[SELECT1]](s136), 128
+    ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[EXTRACT]], [[EXTRACT3]]
+    ; CHECK: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[EXTRACT4]]
+    ; CHECK: [[SELECT4:%[0-9]+]]:_(s8) = G_SELECT [[ICMP1]](s1), [[EXTRACT2]], [[EXTRACT5]]
+    ; CHECK: [[DEF:%[0-9]+]]:_(s136) = G_IMPLICIT_DEF
+    ; CHECK: [[INSERT:%[0-9]+]]:_(s136) = G_INSERT [[DEF]], [[SELECT2]](s64), 0
+    ; CHECK: [[INSERT1:%[0-9]+]]:_(s136) = G_INSERT [[INSERT]], [[SELECT3]](s64), 64
+    ; CHECK: [[INSERT2:%[0-9]+]]:_(s136) = G_INSERT [[INSERT1]], [[SELECT4]](s8), 128
+    ; CHECK: [[MV:%[0-9]+]]:_(s272) = G_MERGE_VALUES [[SELECT]](s136), [[INSERT2]](s136)
+    ; CHECK: [[EXTRACT6:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 0
+    ; CHECK: [[EXTRACT7:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 64
+    ; CHECK: [[EXTRACT8:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 128
+    ; CHECK: [[EXTRACT9:%[0-9]+]]:_(s64) = G_EXTRACT [[OR14]](s272), 192
+    ; CHECK: [[EXTRACT10:%[0-9]+]]:_(s16) = G_EXTRACT [[OR14]](s272), 256
+    ; CHECK: [[EXTRACT11:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 0
+    ; CHECK: [[EXTRACT12:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 64
+    ; CHECK: [[EXTRACT13:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 128
+    ; CHECK: [[EXTRACT14:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s272), 192
+    ; CHECK: [[EXTRACT15:%[0-9]+]]:_(s16) = G_EXTRACT [[MV]](s272), 256
+    ; CHECK: [[OR16:%[0-9]+]]:_(s64) = G_OR [[EXTRACT6]], [[EXTRACT11]]
+    ; CHECK: [[OR17:%[0-9]+]]:_(s64) = G_OR [[EXTRACT7]], [[EXTRACT12]]
+    ; CHECK: [[OR18:%[0-9]+]]:_(s64) = G_OR [[EXTRACT8]], [[EXTRACT13]]
+    ; CHECK: [[OR19:%[0-9]+]]:_(s64) = G_OR [[EXTRACT9]], [[EXTRACT14]]
+    ; CHECK: [[OR20:%[0-9]+]]:_(s16) = G_OR [[EXTRACT10]], [[EXTRACT15]]
+    ; CHECK: [[DEF1:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s272) = G_TRUNC [[DEF1]](s512)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC11]](s272)
+    ; CHECK: [[INSERT3:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT]], [[OR16]](s64), 0
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT3]](s512)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC12]](s272)
+    ; CHECK: [[INSERT4:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT1]], [[OR17]](s64), 64
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT4]](s512)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC13]](s272)
+    ; CHECK: [[INSERT5:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT2]], [[OR18]](s64), 128
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT5]](s512)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC14]](s272)
+    ; CHECK: [[INSERT6:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT3]], [[OR19]](s64), 192
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT6]](s512)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s512) = G_ANYEXT [[TRUNC15]](s272)
+    ; CHECK: [[INSERT7:%[0-9]+]]:_(s512) = G_INSERT [[ANYEXT4]], [[OR20]](s16), 256
+    ; CHECK: [[TRUNC16:%[0-9]+]]:_(s272) = G_TRUNC [[INSERT7]](s512)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[TRUNC16]](s272)
+    ; CHECK: [[TRUNC17:%[0-9]+]]:_(s1) = G_TRUNC [[UV2]](s16)
+    ; CHECK: [[UV19:%[0-9]+]]:_(s1), [[UV20:%[0-9]+]]:_(s1), [[UV21:%[0-9]+]]:_(s1), [[UV22:%[0-9]+]]:_(s1), [[UV23:%[0-9]+]]:_(s1), [[UV24:%[0-9]+]]:_(s1), [[UV25:%[0-9]+]]:_(s1), [[UV26:%[0-9]+]]:_(s1), [[UV27:%[0-9]+]]:_(s1), [[UV28:%[0-9]+]]:_(s1), [[UV29:%[0-9]+]]:_(s1), [[UV30:%[0-9]+]]:_(s1), [[UV31:%[0-9]+]]:_(s1), [[UV32:%[0-9]+]]:_(s1), [[UV33:%[0-9]+]]:_(s1), [[UV34:%[0-9]+]]:_(s1), [[UV35:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC5]](s17)
+    ; CHECK: [[TRUNC18:%[0-9]+]]:_(s1) = G_TRUNC [[UV3]](s16)
+    ; CHECK: [[UV36:%[0-9]+]]:_(s1), [[UV37:%[0-9]+]]:_(s1), [[UV38:%[0-9]+]]:_(s1), [[UV39:%[0-9]+]]:_(s1), [[UV40:%[0-9]+]]:_(s1), [[UV41:%[0-9]+]]:_(s1), [[UV42:%[0-9]+]]:_(s1), [[UV43:%[0-9]+]]:_(s1), [[UV44:%[0-9]+]]:_(s1), [[UV45:%[0-9]+]]:_(s1), [[UV46:%[0-9]+]]:_(s1), [[UV47:%[0-9]+]]:_(s1), [[UV48:%[0-9]+]]:_(s1), [[UV49:%[0-9]+]]:_(s1), [[UV50:%[0-9]+]]:_(s1), [[UV51:%[0-9]+]]:_(s1), [[UV52:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC8]](s17)
+    ; CHECK: [[TRUNC19:%[0-9]+]]:_(s1) = G_TRUNC [[UV4]](s16)
+    ; CHECK: [[UV53:%[0-9]+]]:_(s1), [[UV54:%[0-9]+]]:_(s1), [[UV55:%[0-9]+]]:_(s1), [[UV56:%[0-9]+]]:_(s1), [[UV57:%[0-9]+]]:_(s1), [[UV58:%[0-9]+]]:_(s1), [[UV59:%[0-9]+]]:_(s1), [[UV60:%[0-9]+]]:_(s1), [[UV61:%[0-9]+]]:_(s1), [[UV62:%[0-9]+]]:_(s1), [[UV63:%[0-9]+]]:_(s1), [[UV64:%[0-9]+]]:_(s1), [[UV65:%[0-9]+]]:_(s1), [[UV66:%[0-9]+]]:_(s1), [[UV67:%[0-9]+]]:_(s1), [[UV68:%[0-9]+]]:_(s1), [[UV69:%[0-9]+]]:_(s1) = G_UNMERGE_VALUES [[TRUNC9]](s17)
+    ; CHECK: [[TRUNC20:%[0-9]+]]:_(s1) = G_TRUNC [[UV5]](s16)
+    ; CHECK: [[DEF2:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
+    ; CHECK: [[TRUNC21:%[0-9]+]]:_(s1) = G_TRUNC [[UV6]](s16)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC17]](s1), [[TRUNC18]](s1), [[TRUNC19]](s1), [[TRUNC20]](s1), [[TRUNC21]](s1), %14(s1), %15(s1), %16(s1), %17(s1), %18(s1), %19(s1), [[TRUNC]](s1), [[TRUNC1]](s1), [[TRUNC3]](s1), [[TRUNC4]](s1), [[TRUNC6]](s1), [[TRUNC7]](s1), [[UV19]](s1), [[UV20]](s1), [[UV21]](s1), [[UV22]](s1), [[UV23]](s1), [[UV24]](s1), [[UV25]](s1), [[UV26]](s1), [[UV27]](s1), [[UV28]](s1), [[UV29]](s1), [[UV30]](s1), [[UV31]](s1), [[UV32]](s1), [[UV33]](s1)
+    ; CHECK: [[TRUNC22:%[0-9]+]]:_(s1) = G_TRUNC [[UV7]](s16)
+    ; CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV34]](s1), [[UV35]](s1), [[UV36]](s1), [[UV37]](s1), [[UV38]](s1), [[UV39]](s1), [[UV40]](s1), [[UV41]](s1), [[UV42]](s1), [[UV43]](s1), [[UV44]](s1), [[UV45]](s1), [[UV46]](s1), [[UV47]](s1), [[UV48]](s1), [[UV49]](s1), [[UV50]](s1), [[UV51]](s1), [[UV52]](s1), [[UV53]](s1), [[UV54]](s1), [[UV55]](s1), [[UV56]](s1), [[UV57]](s1), [[UV58]](s1), [[UV59]](s1), [[UV60]](s1), [[UV61]](s1), [[UV62]](s1), [[UV63]](s1), [[UV64]](s1), [[UV65]](s1)
+    ; CHECK: [[TRUNC23:%[0-9]+]]:_(s1) = G_TRUNC [[UV8]](s16)
+    ; CHECK: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV66]](s1), [[UV67]](s1), [[UV68]](s1), [[UV69]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1), [[DEF2]](s1)
+    ; CHECK: [[TRUNC24:%[0-9]+]]:_(s1) = G_TRUNC [[UV9]](s16)
+    ; CHECK: [[MV4:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MV1]](s32), [[MV2]](s32), [[MV3]](s32)
+    ; CHECK: [[TRUNC25:%[0-9]+]]:_(s1) = G_TRUNC [[UV10]](s16)
+    ; CHECK: [[TRUNC26:%[0-9]+]]:_(s68) = G_TRUNC [[MV4]](s96)
+    ; CHECK: [[TRUNC27:%[0-9]+]]:_(s1) = G_TRUNC [[UV11]](s16)
+    ; CHECK: S_NOP 0, implicit [[TRUNC26]](s68)
+    ; CHECK: [[TRUNC28:%[0-9]+]]:_(s1) = G_TRUNC [[UV12]](s16)
+    %0:_(s17) = G_CONSTANT i17 0
+    %1:_(s17) = G_CONSTANT i17 1
+    %2:_(s17) = G_CONSTANT i17 2
+    %3:_(s17) = G_CONSTANT i17 3
+    %4:_(s68) = G_MERGE_VALUES %0, %1, %2, %3
+    S_NOP 0, implicit %4
+...
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 608c0ddf08475..2776f75fe1af4 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -908,8 +908,12 @@ TEST_F(GISelMITest, LowerMergeValues) {
   if (!TM)
     return;
 
+  const LLT S32 = LLT::scalar(32);
   const LLT S24 = LLT::scalar(24);
+  const LLT S21 = LLT::scalar(21);
+  const LLT S16 = LLT::scalar(16);
   const LLT S9 = LLT::scalar(9);
+  const LLT S8 = LLT::scalar(8);
   const LLT S3 = LLT::scalar(3);
 
   DefineLegalizerInfo(A, {
@@ -925,13 +929,80 @@ TEST_F(GISelMITest, LowerMergeValues) {
   // 24 = 3 3 3   3 3 3   3 3
   //     => 9
   //
-  // This can do 2 merges for the first parts, but has 2 leftover operands.
-  SmallVector<Register, 7> MergeOps;
+  // This can do 3 merges, but need an extra implicit_def.
+  SmallVector<Register, 8> Merge0Ops;
   for (int I = 0; I != 8; ++I)
-    MergeOps.push_back(B.buildConstant(S3, I).getReg(0));
+    Merge0Ops.push_back(B.buildConstant(S3, I).getReg(0));
 
-  auto Merge = B.buildMerge(S24, MergeOps);
-  EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize,
-            Helper.lower(*Merge, 1, S9));
+  auto Merge0 = B.buildMerge(S24, Merge0Ops);
+
+  // 21 = 3 3 3   3 3 3   3
+  //     => 9, 2 extra implicit_def needed
+  //
+  SmallVector<Register, 8> Merge1Ops;
+  for (int I = 0; I != 7; ++I)
+    Merge1Ops.push_back(B.buildConstant(S3, I).getReg(0));
+
+  auto Merge1 = B.buildMerge(S21, Merge1Ops);
+
+  SmallVector<Register, 8> Merge2Ops;
+  for (int I = 0; I != 2; ++I)
+    Merge2Ops.push_back(B.buildConstant(S8, I).getReg(0));
+
+    auto Merge2 = B.buildMerge(S16, Merge2Ops);
+
+
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.widenScalar(*Merge0, 1, S9));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.widenScalar(*Merge1, 1, S9));
+
+  // Request a source size greater than the original destination size.
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.widenScalar(*Merge2, 1, S32));
+
+  auto CheckStr = R"(
+  CHECK: [[K0:%[0-9]+]]:_(s3) = G_CONSTANT i3 0
+  CHECK-NEXT: [[K1:%[0-9]+]]:_(s3) = G_CONSTANT i3 1
+  CHECK-NEXT: [[K2:%[0-9]+]]:_(s3) = G_CONSTANT i3 2
+  CHECK-NEXT: [[K3:%[0-9]+]]:_(s3) = G_CONSTANT i3 3
+  CHECK-NEXT: [[K4:%[0-9]+]]:_(s3) = G_CONSTANT i3 -4
+  CHECK-NEXT: [[K5:%[0-9]+]]:_(s3) = G_CONSTANT i3 -3
+  CHECK-NEXT: [[K6:%[0-9]+]]:_(s3) = G_CONSTANT i3 -2
+  CHECK-NEXT: [[K7:%[0-9]+]]:_(s3) = G_CONSTANT i3 -1
+  CHECK-NEXT: [[IMPDEF0:%[0-9]+]]:_(s3) = G_IMPLICIT_DEF
+  CHECK-NEXT: [[MERGE0:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K0]]:_(s3), [[K1]]:_(s3), [[K2]]:_(s3)
+  CHECK-NEXT: [[MERGE1:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K3]]:_(s3), [[K4]]:_(s3), [[K5]]:_(s3)
+  CHECK-NEXT: [[MERGE2:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K6]]:_(s3), [[K7]]:_(s3), [[IMPDEF0]]:_(s3)
+  CHECK-NEXT: [[MERGE3:%[0-9]+]]:_(s27) = G_MERGE_VALUES [[MERGE0]]:_(s9), [[MERGE1]]:_(s9), [[MERGE2]]:_(s9)
+  CHECK-NEXT: (s24) = G_TRUNC [[MERGE3]]:_(s27)
+
+
+  CHECK: [[K8:%[0-9]+]]:_(s3) = G_CONSTANT i3 0
+  CHECK-NEXT: [[K9:%[0-9]+]]:_(s3) = G_CONSTANT i3 1
+  CHECK-NEXT: [[K10:%[0-9]+]]:_(s3) = G_CONSTANT i3 2
+  CHECK-NEXT: [[K11:%[0-9]+]]:_(s3) = G_CONSTANT i3 3
+  CHECK-NEXT: [[K12:%[0-9]+]]:_(s3) = G_CONSTANT i3 -4
+  CHECK-NEXT: [[K13:%[0-9]+]]:_(s3) = G_CONSTANT i3 -3
+  CHECK-NEXT: [[K14:%[0-9]+]]:_(s3) = G_CONSTANT i3 -2
+  CHECK-NEXT: [[IMPDEF1:%[0-9]+]]:_(s3) = G_IMPLICIT_DEF
+  CHECK-NEXT: [[MERGE4:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K8]]:_(s3), [[K9]]:_(s3), [[K10]]:_(s3)
+  CHECK-NEXT: [[MERGE5:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K11]]:_(s3), [[K12]]:_(s3), [[K13]]:_(s3)
+  CHECK-NEXT: [[MERGE6:%[0-9]+]]:_(s9) = G_MERGE_VALUES [[K14]]:_(s3), [[IMPDEF1]]:_(s3), [[IMPDEF1]]:_(s3)
+  CHECK-NEXT: [[MERGE7:%[0-9]+]]:_(s27) = G_MERGE_VALUES [[MERGE4]]:_(s9), [[MERGE5]]:_(s9), [[MERGE6]]:_(s9)
+  CHECK-NEXT: (s21) = G_TRUNC [[MERGE7]]:_(s27)
+
+
+  CHECK: [[K15:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  CHECK-NEXT: [[K16:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+  CHECK-NEXT: [[ZEXT_K15:[0-9]+]]:_(s32) = G_ZEXT [[K15]]:_(s8)
+  CHECK-NEXT: [[ZEXT_K16:[0-9]+]]:_(s32) = G_ZEXT [[K16]]:_(s8)
+  [[K16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT_K16]]:_, [[K16]]:_(s32)
+  [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT_K16]]:_, [[SHL]]:_
+  (s16) = G_TRUNC [[OR]]:_(s32)
+  )";
+
+  EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
 }
 } // namespace

From 85d667fcb6f34175f9a9af87f58fe05b177e7d82 Mon Sep 17 00:00:00 2001
From: Sunil Srivastava <sunil_srivastava@playstation.sony.com>
Date: Wed, 17 Jul 2019 20:41:26 +0000
Subject: [PATCH 386/451] Renamed and changed the wording of warn_cconv_ignored

As discussed in D64780 the wording of this warning message is being
changed to say 'is not supported' instead of 'ignored', and the
diag ID itself is being changed to warn_cconv_not_supported.

llvm-svn: 366368
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  4 +--
 clang/lib/Sema/SemaDecl.cpp                   |  2 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |  2 +-
 clang/lib/Sema/SemaType.cpp                   |  4 +--
 clang/test/CodeGen/aarch64-vpcs.c             |  4 +--
 clang/test/Frontend/macro_defined_type.cpp    |  2 +-
 clang/test/Sema/callingconv-iamcu.c           | 26 +++++++++----------
 clang/test/Sema/callingconv.c                 | 12 ++++-----
 clang/test/Sema/mrtd.c                        |  4 +--
 clang/test/Sema/pr25786.c                     |  4 +--
 clang/test/Sema/stdcall-fastcall-x64.c        | 22 ++++++++--------
 .../SemaCUDA/cuda-inherits-calling-conv.cu    |  2 +-
 clang/test/SemaCXX/borland-extensions.cpp     | 16 ++++++------
 clang/test/SemaCXX/cxx11-gnu-attrs.cpp        | 16 ++++++------
 .../test/SemaCXX/decl-microsoft-call-conv.cpp |  2 +-
 clang/test/SemaCXX/virtual-override-x64.cpp   |  6 ++---
 .../instantiate-function-params.cpp           |  2 +-
 17 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c68271b784da1..effcbad78b231 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2957,8 +2957,8 @@ def err_attribute_vecreturn_only_pod_record : Error<
 def err_cconv_change : Error<
   "function declared '%0' here was previously declared "
   "%select{'%2'|without calling convention}1">;
-def warn_cconv_ignored : Warning<
-  "%0 calling convention ignored %select{"
+def warn_cconv_unsupported : Warning<
+  "%0 calling convention is not supported %select{"
   // Use CallingConventionIgnoredReason Enum to specify these.
   "for this target"
   "|on variadic function"
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 0709c926ed90e..cc91ec5946844 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -3165,7 +3165,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
       // Calling Conventions on a Builtin aren't really useful and setting a
       // default calling convention and cdecl'ing some builtin redeclarations is
       // common, so warn and ignore the calling convention on the redeclaration.
-      Diag(New->getLocation(), diag::warn_cconv_ignored)
+      Diag(New->getLocation(), diag::warn_cconv_unsupported)
           << FunctionType::getNameForCallConv(NewTypeInfo.getCC())
           << (int)CallingConventionIgnoredReason::BuiltinFunction;
       NewTypeInfo = NewTypeInfo.withCallingConv(OldTypeInfo.getCC());
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 725a7770d67d7..ee06f8ae51147 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4669,7 +4669,7 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
     break;
 
   case TargetInfo::CCCR_Warning: {
-    Diag(Attrs.getLoc(), diag::warn_cconv_ignored)
+    Diag(Attrs.getLoc(), diag::warn_cconv_unsupported)
         << Attrs << (int)CallingConventionIgnoredReason::ForThisTarget;
 
     // This convention is not valid for the target. Use the default function or
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 12bad62d1f1dd..bb71db7609f55 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7038,7 +7038,7 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
       // stdcall and fastcall are ignored with a warning for GCC and MS
       // compatibility.
       if (CC == CC_X86StdCall || CC == CC_X86FastCall)
-        return S.Diag(attr.getLoc(), diag::warn_cconv_ignored)
+        return S.Diag(attr.getLoc(), diag::warn_cconv_unsupported)
                << FunctionType::getNameForCallConv(CC)
                << (int)Sema::CallingConventionIgnoredReason::VariadicFunction;
 
@@ -7103,7 +7103,7 @@ void Sema::adjustMemberFunctionCC(QualType &T, bool IsStatic, bool IsCtorOrDtor,
     // Issue a warning on ignored calling convention -- except of __stdcall.
     // Again, this is what MS compiler does.
     if (CurCC != CC_X86StdCall)
-      Diag(Loc, diag::warn_cconv_ignored)
+      Diag(Loc, diag::warn_cconv_unsupported)
           << FunctionType::getNameForCallConv(CurCC)
           << (int)Sema::CallingConventionIgnoredReason::ConstructorDestructor;
   // Default adjustment.
diff --git a/clang/test/CodeGen/aarch64-vpcs.c b/clang/test/CodeGen/aarch64-vpcs.c
index a9edb7490c630..7e4f50cb87bad 100644
--- a/clang/test/CodeGen/aarch64-vpcs.c
+++ b/clang/test/CodeGen/aarch64-vpcs.c
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -x c++ -o - %s | FileCheck %s -check-prefix=CHECKCXX
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -verify %s
 
-void __attribute__((aarch64_vector_pcs)) f(int *); // expected-warning {{'aarch64_vector_pcs' calling convention ignored for this target}}
+void __attribute__((aarch64_vector_pcs)) f(int *); // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}}
 
 // CHECKC: define void @g(
 // CHECKCXX: define void @_Z1gPi(
@@ -16,7 +16,7 @@ void g(int *a) {
 // CHECKC: declare aarch64_vector_pcs void @f(
 // CHECKCXX: declare aarch64_vector_pcs void @_Z1fPi
 
-void __attribute__((aarch64_vector_pcs)) h(int *a){ // expected-warning {{'aarch64_vector_pcs' calling convention ignored for this target}}
+void __attribute__((aarch64_vector_pcs)) h(int *a){ // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}}
 // CHECKC: define aarch64_vector_pcs void @h(
 // CHECKCXX: define aarch64_vector_pcs void @_Z1hPi(
   f(a);
diff --git a/clang/test/Frontend/macro_defined_type.cpp b/clang/test/Frontend/macro_defined_type.cpp
index 7a4427e42c099..d4f54b65a8d68 100644
--- a/clang/test/Frontend/macro_defined_type.cpp
+++ b/clang/test/Frontend/macro_defined_type.cpp
@@ -17,5 +17,5 @@ void Func() {
 // Added test for fix for P41835
 #define _LIBCPP_FLOAT_ABI __attribute__((pcs("aapcs")))
 struct A {
-  _LIBCPP_FLOAT_ABI int operator()() throw(); // expected-warning{{'pcs' calling convention ignored for this target}}
+  _LIBCPP_FLOAT_ABI int operator()() throw(); // expected-warning{{'pcs' calling convention is not supported for this target}}
 };
diff --git a/clang/test/Sema/callingconv-iamcu.c b/clang/test/Sema/callingconv-iamcu.c
index 2c99b029b6a28..248e98b0d01df 100644
--- a/clang/test/Sema/callingconv-iamcu.c
+++ b/clang/test/Sema/callingconv-iamcu.c
@@ -1,35 +1,35 @@
 // RUN: %clang_cc1 %s -fsyntax-only -triple i686-intel-elfiamcu -verify
 
-void __attribute__((fastcall)) foo(float *a) { // expected-warning {{'fastcall' calling convention ignored for this target}}
+void __attribute__((fastcall)) foo(float *a) { // expected-warning {{'fastcall' calling convention is not supported for this target}}
 }
 
-void __attribute__((stdcall)) bar(float *a) { // expected-warning {{'stdcall' calling convention ignored for this target}}
+void __attribute__((stdcall)) bar(float *a) { // expected-warning {{'stdcall' calling convention is not supported for this target}}
 }
 
 void __attribute__((fastcall(1))) baz(float *a) { // expected-error {{'fastcall' attribute takes no arguments}}
 }
 
-void __attribute__((fastcall)) test2(int a, ...) { // expected-warning {{'fastcall' calling convention ignored for this target}}
+void __attribute__((fastcall)) test2(int a, ...) { // expected-warning {{'fastcall' calling convention is not supported for this target}}
 }
-void __attribute__((stdcall)) test3(int a, ...) { // expected-warning {{'stdcall' calling convention ignored for this target}}
+void __attribute__((stdcall)) test3(int a, ...) { // expected-warning {{'stdcall' calling convention is not supported for this target}}
 }
-void __attribute__((thiscall)) test4(int a, ...) { // expected-warning {{'thiscall' calling convention ignored for this target}}
+void __attribute__((thiscall)) test4(int a, ...) { // expected-warning {{'thiscall' calling convention is not supported for this target}}
 }
 
 void __attribute__((cdecl)) ctest0() {}
 
 void __attribute__((cdecl(1))) ctest1(float x) {} // expected-error {{'cdecl' attribute takes no arguments}}
 
-void (__attribute__((fastcall)) *pfoo)(float*) = foo; // expected-warning {{'fastcall' calling convention ignored for this target}}
+void (__attribute__((fastcall)) *pfoo)(float*) = foo; // expected-warning {{'fastcall' calling convention is not supported for this target}}
 
-void (__attribute__((stdcall)) *pbar)(float*) = bar; // expected-warning {{'stdcall' calling convention ignored for this target}}
+void (__attribute__((stdcall)) *pbar)(float*) = bar; // expected-warning {{'stdcall' calling convention is not supported for this target}}
 
 void (*pctest0)() = ctest0;
 
 void ctest2() {}
 void (__attribute__((cdecl)) *pctest2)() = ctest2;
 
-typedef void (__attribute__((fastcall)) *Handler) (float *); // expected-warning {{'fastcall' calling convention ignored for this target}}
+typedef void (__attribute__((fastcall)) *Handler) (float *); // expected-warning {{'fastcall' calling convention is not supported for this target}}
 Handler H = foo;
 
 int __attribute__((pcs("aapcs", "aapcs"))) pcs1(void); // expected-error {{'pcs' attribute takes one argument}}
@@ -38,16 +38,16 @@ int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute r
                                            // expected-error {{invalid PCS type}}
 int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}}
 /* These are ignored because the target is i386 and not ARM */
-int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention ignored for this target}}
-int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention ignored for this target}}
+int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
+int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
 int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type}}
 
 void ctest3();
 void __attribute__((cdecl)) ctest3() {}
 
-typedef __attribute__((stdcall)) void (*PROC)(); // expected-warning {{'stdcall' calling convention ignored for this target}}
+typedef __attribute__((stdcall)) void (*PROC)(); // expected-warning {{'stdcall' calling convention is not supported for this target}}
 PROC __attribute__((cdecl)) ctest4(const char *x) {}
 
-void __attribute__((intel_ocl_bicc)) inteloclbifunc(float *a) {} // expected-warning {{'intel_ocl_bicc' calling convention ignored for this target}}
+void __attribute__((intel_ocl_bicc)) inteloclbifunc(float *a) {} // expected-warning {{'intel_ocl_bicc' calling convention is not supported for this target}}
 
-struct type_test {} __attribute__((stdcall)); // expected-warning {{'stdcall' calling convention ignored for this target}} expected-warning {{'stdcall' attribute only applies to functions and methods}}
+struct type_test {} __attribute__((stdcall)); // expected-warning {{'stdcall' calling convention is not supported for this target}} expected-warning {{'stdcall' attribute only applies to functions and methods}}
diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c
index e6d6ad2c12048..6273d04f21217 100644
--- a/clang/test/Sema/callingconv.c
+++ b/clang/test/Sema/callingconv.c
@@ -16,9 +16,9 @@ void __attribute__((fastcall)) test0() {
 void __attribute__((fastcall)) test1(void) {
 }
 
-void __attribute__((fastcall)) test2(int a, ...) { // expected-warning {{fastcall calling convention ignored on variadic function}}
+void __attribute__((fastcall)) test2(int a, ...) { // expected-warning {{fastcall calling convention is not supported on variadic function}}
 }
-void __attribute__((stdcall)) test3(int a, ...) { // expected-warning {{stdcall calling convention ignored on variadic function}}
+void __attribute__((stdcall)) test3(int a, ...) { // expected-warning {{stdcall calling convention is not supported on variadic function}}
 }
 void __attribute__((thiscall)) test4(int a, ...) { // expected-error {{variadic function cannot use thiscall calling convention}}
 }
@@ -47,11 +47,11 @@ int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute r
                                            // expected-error {{invalid PCS type}}
 int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}}
 /* These are ignored because the target is i386 and not ARM */
-int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention ignored for this target}}
-int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention ignored for this target}}
+int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
+int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{'pcs' calling convention is not supported for this target}}
 int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type}}
 
-int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aarch64_vector_pcs' calling convention ignored for this target}}
+int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aarch64_vector_pcs' calling convention is not supported for this target}}
 
 // PR6361
 void ctest3();
@@ -69,4 +69,4 @@ void __attribute__((stdcall)) typedef_fun(int x) { } // expected-error {{functio
 
 struct type_test {} __attribute__((stdcall));  // expected-warning {{'stdcall' attribute only applies to functions and methods}}
 
-void __vectorcall __builtin_unreachable(); // expected-warning {{vectorcall calling convention ignored on builtin function}}
+void __vectorcall __builtin_unreachable(); // expected-warning {{vectorcall calling convention is not supported on builtin function}}
diff --git a/clang/test/Sema/mrtd.c b/clang/test/Sema/mrtd.c
index 7bdeb27293b78..0ce0888060643 100644
--- a/clang/test/Sema/mrtd.c
+++ b/clang/test/Sema/mrtd.c
@@ -12,7 +12,7 @@ void __attribute__((stdcall)) nonvariadic1(int a, int b, int c);
 void nonvariadic2(int a, int b, int c);
 void __attribute__((stdcall)) nonvariadic2(int a, int b, int c) { }
 
-// expected-warning@+2 {{stdcall calling convention ignored on variadic function}}
+// expected-warning@+2 {{stdcall calling convention is not supported on variadic function}}
 void variadic(int a, ...);
 void __attribute__((stdcall)) variadic(int a, ...);
 
@@ -33,6 +33,6 @@ __attribute__((cdecl)) extern void (*b)(int, ...);
 extern void (*c)(int, int);
 __attribute__((stdcall)) extern void (*c)(int, int);
 
-// expected-warning@+2 {{stdcall calling convention ignored on variadic function}}
+// expected-warning@+2 {{stdcall calling convention is not supported on variadic function}}
 extern void (*d)(int, ...);
 __attribute__((stdcall)) extern void (*d)(int, ...);
diff --git a/clang/test/Sema/pr25786.c b/clang/test/Sema/pr25786.c
index bfc2b35ede239..f79d8144716b1 100644
--- a/clang/test/Sema/pr25786.c
+++ b/clang/test/Sema/pr25786.c
@@ -2,8 +2,8 @@
 // RUN: %clang_cc1 -triple i686-unknown-linux-gnu -fsyntax-only -verify %s
 
 #if TEST
-void (__attribute__((regparm(3), stdcall)) *pf) (); //expected-warning {{'stdcall' calling convention ignored for this target}}
-void (__attribute__((regparm(2), stdcall)) foo)(int a) { //expected-warning {{'stdcall' calling convention ignored for this target}}
+void (__attribute__((regparm(3), stdcall)) *pf) (); //expected-warning {{'stdcall' calling convention is not supported for this target}}
+void (__attribute__((regparm(2), stdcall)) foo)(int a) { //expected-warning {{'stdcall' calling convention is not supported for this target}}
 }
 #else
 //expected-no-diagnostics
diff --git a/clang/test/Sema/stdcall-fastcall-x64.c b/clang/test/Sema/stdcall-fastcall-x64.c
index e2e39e434f810..335da4169352a 100644
--- a/clang/test/Sema/stdcall-fastcall-x64.c
+++ b/clang/test/Sema/stdcall-fastcall-x64.c
@@ -5,16 +5,16 @@ int __attribute__((stdcall)) var1; // expected-warning{{'stdcall' only applies t
 int __attribute__((fastcall)) var2; // expected-warning{{'fastcall' only applies to function types; type here is 'int'}}
 
 // Different CC qualifiers are not compatible
-void __attribute__((stdcall, fastcall)) foo3(void); // expected-warning{{'stdcall' calling convention ignored for this target}} expected-warning {{'fastcall' calling convention ignored for this target}}
-void __attribute__((stdcall)) foo4(); // expected-warning{{'stdcall' calling convention ignored for this target}}
-void __attribute__((fastcall)) foo4(void); // expected-warning {{'fastcall' calling convention ignored for this target}}
+void __attribute__((stdcall, fastcall)) foo3(void); // expected-warning{{'stdcall' calling convention is not supported for this target}} expected-warning {{'fastcall' calling convention is not supported for this target}}
+void __attribute__((stdcall)) foo4(); // expected-warning{{'stdcall' calling convention is not supported for this target}}
+void __attribute__((fastcall)) foo4(void); // expected-warning {{'fastcall' calling convention is not supported for this target}}
 
 // rdar://8876096
-void rdar8876096foo1(int i, int j) __attribute__((fastcall, cdecl)); // expected-warning{{'fastcall' calling convention ignored for this target}}
-void rdar8876096foo2(int i, int j) __attribute__((fastcall, stdcall)); // expected-warning{{'stdcall' calling convention ignored for this target}} expected-warning {{'fastcall' calling convention ignored for this target}}
-void rdar8876096foo3(int i, int j) __attribute__((fastcall, regparm(2))); // expected-warning {{'fastcall' calling convention ignored for this target}}
-void rdar8876096foo4(int i, int j) __attribute__((stdcall, cdecl)); // expected-warning{{'stdcall' calling convention ignored for this target}}
-void rdar8876096foo5(int i, int j) __attribute__((stdcall, fastcall)); // expected-warning{{'stdcall' calling convention ignored for this target}} expected-warning {{'fastcall' calling convention ignored for this target}}
-void rdar8876096foo6(int i, int j) __attribute__((cdecl, fastcall)); // expected-warning {{'fastcall' calling convention ignored for this target}}
-void rdar8876096foo7(int i, int j) __attribute__((cdecl, stdcall)); // expected-warning{{'stdcall' calling convention ignored for this target}}
-void rdar8876096foo8(int i, int j) __attribute__((regparm(2), fastcall)); // expected-warning {{'fastcall' calling convention ignored for this target}}
+void rdar8876096foo1(int i, int j) __attribute__((fastcall, cdecl)); // expected-warning{{'fastcall' calling convention is not supported for this target}}
+void rdar8876096foo2(int i, int j) __attribute__((fastcall, stdcall)); // expected-warning{{'stdcall' calling convention is not supported for this target}} expected-warning {{'fastcall' calling convention is not supported for this target}}
+void rdar8876096foo3(int i, int j) __attribute__((fastcall, regparm(2))); // expected-warning {{'fastcall' calling convention is not supported for this target}}
+void rdar8876096foo4(int i, int j) __attribute__((stdcall, cdecl)); // expected-warning{{'stdcall' calling convention is not supported for this target}}
+void rdar8876096foo5(int i, int j) __attribute__((stdcall, fastcall)); // expected-warning{{'stdcall' calling convention is not supported for this target}} expected-warning {{'fastcall' calling convention is not supported for this target}}
+void rdar8876096foo6(int i, int j) __attribute__((cdecl, fastcall)); // expected-warning {{'fastcall' calling convention is not supported for this target}}
+void rdar8876096foo7(int i, int j) __attribute__((cdecl, stdcall)); // expected-warning{{'stdcall' calling convention is not supported for this target}}
+void rdar8876096foo8(int i, int j) __attribute__((regparm(2), fastcall)); // expected-warning {{'fastcall' calling convention is not supported for this target}}
diff --git a/clang/test/SemaCUDA/cuda-inherits-calling-conv.cu b/clang/test/SemaCUDA/cuda-inherits-calling-conv.cu
index 881f2945b1b34..a6928e71f3ae0 100644
--- a/clang/test/SemaCUDA/cuda-inherits-calling-conv.cu
+++ b/clang/test/SemaCUDA/cuda-inherits-calling-conv.cu
@@ -24,7 +24,7 @@ struct Foo<T()> {};
 // expected-no-diagnostics
 #else
 // expected-error@+4 {{redefinition of 'Foo}}
-// expected-warning@+3 {{'__fastcall' calling convention ignored}}
+// expected-warning@+3 {{'__fastcall' calling convention is not supported}}
 #endif
 template <class T>
 struct Foo<T __fastcall()> {};
diff --git a/clang/test/SemaCXX/borland-extensions.cpp b/clang/test/SemaCXX/borland-extensions.cpp
index 31ebf03712924..a869f4ee60e90 100644
--- a/clang/test/SemaCXX/borland-extensions.cpp
+++ b/clang/test/SemaCXX/borland-extensions.cpp
@@ -7,21 +7,21 @@
 int dummy_function() { return 0; }
 
 // 2. test __pascal
-// expected-warning@+1 {{'_pascal' calling convention ignored for this target}}
+// expected-warning@+1 {{'_pascal' calling convention is not supported for this target}}
 int _pascal f2();
 
-// expected-warning@+1 {{'__pascal' calling convention ignored for this target}}
+// expected-warning@+1 {{'__pascal' calling convention is not supported for this target}}
 float __pascal gi2(int, int); 
-// expected-warning@+1 {{'__pascal' calling convention ignored for this target}}
+// expected-warning@+1 {{'__pascal' calling convention is not supported for this target}}
 template<typename T> T g2(T (__pascal * const )(int, int)) { return 0; }
 
 struct M {
-    // expected-warning@+1 {{'__pascal' calling convention ignored for this target}}
+    // expected-warning@+1 {{'__pascal' calling convention is not supported for this target}}
     int __pascal addP();
-    // expected-warning@+1 {{'__pascal' calling convention ignored for this target}}
+    // expected-warning@+1 {{'__pascal' calling convention is not supported for this target}}
     float __pascal subtractP(); 
 };
-// expected-warning@+1 {{'__pascal' calling convention ignored for this target}}
+// expected-warning@+1 {{'__pascal' calling convention is not supported for this target}}
 template<typename T> int h2(T (__pascal M::* const )()) { return 0; }
 void m2() {
     int i; float f;
@@ -34,9 +34,9 @@ void m2() {
 
 // 3. test other calling conventions
 int _cdecl fa3();
-// expected-warning@+1 {{'_fastcall' calling convention ignored for this target}}
+// expected-warning@+1 {{'_fastcall' calling convention is not supported for this target}}
 int _fastcall fc3();
-// expected-warning@+1 {{'_stdcall' calling convention ignored for this target}}
+// expected-warning@+1 {{'_stdcall' calling convention is not supported for this target}}
 int _stdcall fd3();
 
 // 4. test __uuidof()
diff --git a/clang/test/SemaCXX/cxx11-gnu-attrs.cpp b/clang/test/SemaCXX/cxx11-gnu-attrs.cpp
index 1e8ad1e495bbf..a91cb278b4d24 100644
--- a/clang/test/SemaCXX/cxx11-gnu-attrs.cpp
+++ b/clang/test/SemaCXX/cxx11-gnu-attrs.cpp
@@ -9,18 +9,18 @@ int [[gnu::unused]] attr_on_type;
 int *[[gnu::unused]] attr_on_ptr;
 // expected-warning@-1 {{attribute 'unused' ignored, because it cannot be applied to a type}}
 [[gnu::fastcall]] void pr17424_1();
-// expected-warning@-1 {{'fastcall' calling convention ignored for this target}}
+// expected-warning@-1 {{'fastcall' calling convention is not supported for this target}}
 [[gnu::fastcall]] [[gnu::stdcall]] void pr17424_2();
-// expected-warning@-1 {{'fastcall' calling convention ignored for this target}}
-// expected-warning@-2 {{'stdcall' calling convention ignored for this target}}
+// expected-warning@-1 {{'fastcall' calling convention is not supported for this target}}
+// expected-warning@-2 {{'stdcall' calling convention is not supported for this target}}
 [[gnu::fastcall]] __stdcall void pr17424_3();
-// expected-warning@-1 {{'fastcall' calling convention ignored for this target}}
-// expected-warning@-2 {{'__stdcall' calling convention ignored for this target}}
+// expected-warning@-1 {{'fastcall' calling convention is not supported for this target}}
+// expected-warning@-2 {{'__stdcall' calling convention is not supported for this target}}
 [[gnu::fastcall]] void pr17424_4() [[gnu::stdcall]];
-// expected-warning@-1 {{'fastcall' calling convention ignored for this target}}
-// expected-warning@-2 {{'stdcall' calling convention ignored for this target}}
+// expected-warning@-1 {{'fastcall' calling convention is not supported for this target}}
+// expected-warning@-2 {{'stdcall' calling convention is not supported for this target}}
 void pr17424_5 [[gnu::fastcall]]();
-// expected-warning@-1 {{'fastcall' calling convention ignored for this target}}
+// expected-warning@-1 {{'fastcall' calling convention is not supported for this target}}
 
 // Valid cases.
 
diff --git a/clang/test/SemaCXX/decl-microsoft-call-conv.cpp b/clang/test/SemaCXX/decl-microsoft-call-conv.cpp
index acd9b0720b620..a2a04ac9c8b99 100644
--- a/clang/test/SemaCXX/decl-microsoft-call-conv.cpp
+++ b/clang/test/SemaCXX/decl-microsoft-call-conv.cpp
@@ -77,7 +77,7 @@ struct S {
 
   // Structors can't be other than default in MS ABI environment
 #ifdef MSABI
-  __vectorcall S(); // expected-warning {{vectorcall calling convention ignored on constructor/destructor}}
+  __vectorcall S(); // expected-warning {{vectorcall calling convention is not supported on constructor/destructor}}
 #endif
 };
 
diff --git a/clang/test/SemaCXX/virtual-override-x64.cpp b/clang/test/SemaCXX/virtual-override-x64.cpp
index 5b9b2148f00a5..f3eaf11569c8f 100644
--- a/clang/test/SemaCXX/virtual-override-x64.cpp
+++ b/clang/test/SemaCXX/virtual-override-x64.cpp
@@ -6,7 +6,7 @@
 namespace PR14339 {
   class A {
   public:
-    virtual void __attribute__((thiscall)) f();	// expected-warning {{'thiscall' calling convention ignored for this target}}
+    virtual void __attribute__((thiscall)) f();	// expected-warning {{'thiscall' calling convention is not supported for this target}}
   };
 
   class B : public A {
@@ -16,7 +16,7 @@ namespace PR14339 {
 
   class C : public A {
   public:
-    void __attribute__((thiscall)) f();  // expected-warning {{'thiscall' calling convention ignored for this target}}
+    void __attribute__((thiscall)) f();  // expected-warning {{'thiscall' calling convention is not supported for this target}}
   };
 
   class D : public A {
@@ -26,7 +26,7 @@ namespace PR14339 {
 
   class E {
   public:
-    virtual void __attribute__((stdcall)) g();  // expected-warning {{'stdcall' calling convention ignored for this target}}
+    virtual void __attribute__((stdcall)) g();  // expected-warning {{'stdcall' calling convention is not supported for this target}}
   };
 
   class F : public E {
diff --git a/clang/test/SemaTemplate/instantiate-function-params.cpp b/clang/test/SemaTemplate/instantiate-function-params.cpp
index 7984e258653de..505034d004d19 100644
--- a/clang/test/SemaTemplate/instantiate-function-params.cpp
+++ b/clang/test/SemaTemplate/instantiate-function-params.cpp
@@ -88,7 +88,7 @@ namespace InstantiateFunctionTypedef {
     __attribute__((stdcall)) functype stdfunc1;
     stdfunctype stdfunc2;
 
-    __attribute__((pcs("aapcs"))) functype pcsfunc; // expected-warning {{'pcs' calling convention ignored for this target}}
+    __attribute__((pcs("aapcs"))) functype pcsfunc; // expected-warning {{'pcs' calling convention is not supported for this target}}
   };
 
   void f(X<int> x) {

From 90ba54bf67c4c134d000b064121789a32c0c6a73 Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Wed, 17 Jul 2019 20:46:09 +0000
Subject: [PATCH 387/451] [CodeGen][NFC] Simplify checks for stack protector
 index checking

Use `hasStackProtectorIndex()` instead of `getStackProtectorIndex() >=
0`.

llvm-svn: 366369
---
 llvm/lib/CodeGen/LocalStackSlotAllocation.cpp |  8 ++++----
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     | 16 +++++++---------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index bddd0c7732cdc..aa8f824c6b952 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -199,19 +199,19 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI.getStackProtectorIndex() >= 0) {
+  if (MFI.hasStackProtectorIndex()) {
+    int StackProtectorFI = MFI.getStackProtectorIndex();
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
-                      StackGrowsDown, MaxAlign);
+    AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
 
     // Assign large stack objects first.
     for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
       if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI.getStackProtectorIndex() == (int)i)
+      if (StackProtectorFI == (int)i)
         continue;
 
       switch (MFI.getObjectSSPLayout(i)) {
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 8e31c070714c8..dfbf665321ded 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -927,18 +927,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI.getStackProtectorIndex() >= 0) {
+  if (MFI.hasStackProtectorIndex()) {
+    int StackProtectorFI = MFI.getStackProtectorIndex();
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
-                      Offset, MaxAlign, Skew);
+    AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
+                      Skew);
 
     // Assign large stack objects first.
     for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
-      if (MFI.isObjectPreAllocated(i) &&
-          MFI.getUseLocalStackAllocationBlock())
+      if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
         continue;
       if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
         continue;
@@ -946,8 +946,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
         continue;
       if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI.getStackProtectorIndex() == (int)i ||
-          EHRegNodeFrameIndex == (int)i)
+      if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
         continue;
       if (MFI.getStackID(i) !=
           TargetStackID::Default) // Only allocate objects on the default stack.
@@ -990,8 +989,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
       continue;
     if (MFI.isDeadObjectIndex(i))
       continue;
-    if (MFI.getStackProtectorIndex() == (int)i ||
-        EHRegNodeFrameIndex == (int)i)
+    if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i)
       continue;
     if (ProtectedObjs.count(i))
       continue;

From 39fc2843e4eb07370d55f0a7a0db34d4bd6c9d5f Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Wed, 17 Jul 2019 20:46:16 +0000
Subject: [PATCH 388/451] [CodeGen] Add stack protector tests where the guard
 gets re-assigned

In preparation of a fix, add tests for multiple backends.

llvm-svn: 366370
---
 .../CodeGen/AArch64/stack-guard-reassign.ll   |  9 ++++
 .../CodeGen/AArch64/stack-guard-reassign.mir  | 34 +++++++++++++++
 .../test/CodeGen/AArch64/stack-guard-vaarg.ll | 41 +++++++++++++++++++
 llvm/test/CodeGen/ARM/stack-guard-reassign.ll | 14 +++++++
 .../CodeGen/Inputs/stack-guard-reassign.ll    | 21 ++++++++++
 .../CodeGen/PowerPC/stack-guard-reassign.ll   | 15 +++++++
 6 files changed, 134 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/stack-guard-reassign.ll
 create mode 100644 llvm/test/CodeGen/AArch64/stack-guard-reassign.mir
 create mode 100644 llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll
 create mode 100644 llvm/test/CodeGen/ARM/stack-guard-reassign.ll
 create mode 100644 llvm/test/CodeGen/Inputs/stack-guard-reassign.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll

diff --git a/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll b/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll
new file mode 100644
index 0000000000000..a3b00fd2cca9c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll
@@ -0,0 +1,9 @@
+; RUN: llc -O0 --frame-pointer=all -mtriple=aarch64-- -o - %S/../Inputs/stack-guard-reassign.ll | FileCheck %s
+
+; Verify that the offset assigned to the stack protector is at the top of the
+; frame, covering the locals.
+; CHECK-LABEL: fn:
+; CHECK:      add x8, sp, #24
+; CHECK-NEXT: adrp x9, __stack_chk_guard
+; CHECK-NEXT: ldr x9, [x9, :lo12:__stack_chk_guard]
+; CHECK-NEXT: str x9, [x8]
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir b/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir
new file mode 100644
index 0000000000000..296290e2b0f12
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir
@@ -0,0 +1,34 @@
+# RUN: llc -mtriple=arm64-apple-ios -start-before=localstackalloc -stop-after=prologepilog -o - %s | FileCheck %s
+
+--- |
+  @__stack_chk_guard = external global i8*
+  define i32 @main(i32, i8**) {
+    %StackGuardSlot = alloca i8*
+    unreachable
+  }
+...
+---
+name:            main
+tracksRegLiveness: true
+frameInfo:
+# CHECK: stackSize: 560
+  stackProtector:  '%stack.0.StackGuardSlot'
+stack:
+  - { id: 0, name: StackGuardSlot, size: 8, alignment: 8, stack-id: default }
+# Verify that the offset assigned to the stack protector is at the top of the
+# frame, covering the locals.
+# CHECK:       - { id: 0, name: StackGuardSlot, type: default, offset: -552, size: 8,
+# CHECK-NEXT:      alignment: 8, stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT:      local-offset: -8, debug-info-variable: '', debug-info-expression: '',
+# CHECK-NEXT:      debug-info-location: '' }
+  - { id: 1, size: 512, alignment: 1, stack-id: default }
+  - { id: 2, size: 4, alignment: 4, stack-id: default }
+body:             |
+  bb.0:
+    %25:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard)
+    STRXui killed %25, %stack.0.StackGuardSlot, 0 :: (volatile store 8 into %stack.0.StackGuardSlot)
+    %28:gpr64 = LDRXui %stack.0.StackGuardSlot, 0 :: (volatile load 8 from %stack.0.StackGuardSlot)
+    %29:gpr64common = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard)
+    RET_ReallyLR implicit undef $w0, implicit killed %28, implicit killed %29
+
+...
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll b/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll
new file mode 100644
index 0000000000000..e083aa10e3334
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll
@@ -0,0 +1,41 @@
+; RUN: llc --frame-pointer=all -mtriple=aarch64-- < %s | FileCheck %s
+
+; PR25610: -fstack-protector places the canary in the wrong place on arm64 with
+;          va_args
+
+%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
+
+; CHECK-LABEL: test
+; CHECK: ldr [[GUARD:x[0-9]+]]{{.*}}:lo12:__stack_chk_guard]
+; Make sure the canary is placed relative to the frame pointer, not
+; the stack pointer.
+; CHECK: str [[GUARD]], [sp, #8]
+define void @test(i8* %i, ...) #0 {
+entry:
+  %buf = alloca [10 x i8], align 1
+  %ap = alloca %struct.__va_list, align 8
+  %tmp = alloca %struct.__va_list, align 8
+  %0 = getelementptr inbounds [10 x i8], [10 x i8]* %buf, i64 0, i64 0
+  call void @llvm.lifetime.start(i64 10, i8* %0)
+  %1 = bitcast %struct.__va_list* %ap to i8*
+  call void @llvm.lifetime.start(i64 32, i8* %1)
+  call void @llvm.va_start(i8* %1)
+  %2 = bitcast %struct.__va_list* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 32, i32 8, i1 false)
+  call void @baz(i8* %i, %struct.__va_list* nonnull %tmp)
+  call void @bar(i8* %0)
+  call void @llvm.va_end(i8* %1)
+  call void @llvm.lifetime.end(i64 32, i8* %1)
+  call void @llvm.lifetime.end(i64 10, i8* %0)
+  ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.va_start(i8*)
+declare void @baz(i8*, %struct.__va_list*)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+declare void @bar(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { noinline nounwind optnone ssp }
diff --git a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
new file mode 100644
index 0000000000000..ae3d91361e228
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 --frame-pointer=none -mtriple=arm-- -o - %S/../Inputs/stack-guard-reassign.ll | FileCheck %s
+
+; Verify that the offset assigned to the stack protector is at the top of the
+; frame, covering the locals.
+; CHECK-LABEL: fn:
+; CHECK:      sub sp, sp, #40
+; CHECK-NEXT: sub sp, sp, #65536
+; CHECK-NEXT: add r1, sp, #28
+; CHECK-NEXT: ldr r2, .LCPI0_0
+; CHECK-NEXT: ldr r3, [r2]
+; CHECK-NEXT: str r3, [r1]
+; CHECK-NEXT: str r0, [sp, #32]
+; CHECK: .LCPI0_0:
+; CHECK-NEXT: .long __stack_chk_guard
diff --git a/llvm/test/CodeGen/Inputs/stack-guard-reassign.ll b/llvm/test/CodeGen/Inputs/stack-guard-reassign.ll
new file mode 100644
index 0000000000000..5829b2562e7bb
--- /dev/null
+++ b/llvm/test/CodeGen/Inputs/stack-guard-reassign.ll
@@ -0,0 +1,21 @@
+define i32 @fn(i8* %str) #0 {
+entry:
+  %str.addr = alloca i8*, align 4
+  %buffer = alloca [65536 x i8], align 1
+  store i8* %str, i8** %str.addr, align 4
+  %arraydecay = getelementptr inbounds [65536 x i8], [65536 x i8]* %buffer, i32 0, i32 0
+  %0 = load i8*, i8** %str.addr, align 4
+  %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
+  %arraydecay1 = getelementptr inbounds [65536 x i8], [65536 x i8]* %buffer, i32 0, i32 0
+  %call2 = call i32 @puts(i8* %arraydecay1)
+  %arrayidx = getelementptr inbounds [65536 x i8], [65536 x i8]* %buffer, i32 0, i32 65535
+  %1 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %1 to i32
+  ret i32 %conv
+}
+
+declare i8* @strcpy(i8*, i8*)
+
+declare i32 @puts(i8*)
+
+attributes #0 = { noinline nounwind optnone ssp }
diff --git a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
new file mode 100644
index 0000000000000..3a8cac506c29e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O0 --frame-pointer=none -mtriple=powerpc-- -o - %S/../Inputs/stack-guard-reassign.ll | FileCheck %s
+
+; Verify that the offset assigned to the stack protector is at the top of the
+; frame, covering the locals.
+; CHECK-LABEL: fn:
+; CHECK:      mflr 0
+; CHECK-NEXT: stw 0, 4(1)
+; CHECK-NEXT: lis 0, -2
+; CHECK-NEXT: ori 0, 0, 65488
+; CHECK-NEXT: stwux 1, 1, 0
+; CHECK-NEXT: subf 0, 0, 1
+; CHECK-NEXT: addi 4, 1, 36
+; CHECK-NEXT: lis 5, __stack_chk_guard@ha
+; CHECK-NEXT: lwz 6, __stack_chk_guard@l(5)
+; CHECK-NEXT: stw 6, 0(4)

From 9f2b290addfc4d9f514790b47773b141682b0db5 Mon Sep 17 00:00:00 2001
From: Francis Visoiu Mistrih <francisvm@yahoo.com>
Date: Wed, 17 Jul 2019 20:46:19 +0000
Subject: [PATCH 389/451] [PEI] Don't re-allocate a pre-allocated stack
 protector slot

The LocalStackSlotPass pre-allocates a stack protector and makes sure
that it comes before the local variables on the stack.

We need to make sure that later during PEI we don't re-allocate a new
stack protector slot. If that happens, the new stack protector slot will
end up being **after** the local variables that it should be protecting.

Therefore, we would have two slots assigned for two different stack
protectors, one at the top of the stack, and one at the bottom. Since
PEI will overwrite the assigned slot for the stack protector, the load
that is used to compare the value of the stack protector will use the
slot assigned by PEI, which is wrong.

For this, we need to check if the object is pre-allocated, and re-use
that pre-allocated slot.

Differential Revision: https://reviews.llvm.org/D64757

llvm-svn: 366371
---
 llvm/lib/CodeGen/LocalStackSlotAllocation.cpp |  8 +++++++
 llvm/lib/CodeGen/PrologEpilogInserter.cpp     | 21 +++++++++++++++++--
 .../CodeGen/AArch64/stack-guard-reassign.ll   |  2 +-
 .../CodeGen/AArch64/stack-guard-reassign.mir  |  4 ++--
 .../test/CodeGen/AArch64/stack-guard-vaarg.ll |  2 +-
 llvm/test/CodeGen/ARM/stack-guard-reassign.ll |  6 +++---
 .../CodeGen/PowerPC/stack-guard-reassign.ll   |  4 +++-
 7 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index aa8f824c6b952..b14d76a585f73 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -201,6 +201,14 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   SmallSet<int, 16> ProtectedObjs;
   if (MFI.hasStackProtectorIndex()) {
     int StackProtectorFI = MFI.getStackProtectorIndex();
+
+    // We need to make sure we didn't pre-allocate the stack protector when
+    // doing this.
+    // If we already have a stack protector, this will re-assign it to a slot
+    // that is **not** covering the protected objects.
+    assert(!MFI.isObjectPreAllocated(StackProtectorFI) &&
+           "Stack protector pre-allocated in LocalStackSlotAllocation");
+
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index dfbf665321ded..d463bee675958 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -933,8 +933,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
-                      Skew);
+    // If we need a stack protector, we need to make sure that
+    // LocalStackSlotPass didn't already allocate a slot for it.
+    // If we are told to use the LocalStackAllocationBlock, the stack protector
+    // is expected to be already pre-allocated.
+    if (!MFI.getUseLocalStackAllocationBlock())
+      AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
+                        Skew);
+    else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
+      llvm_unreachable(
+          "Stack protector not pre-allocated by LocalStackSlotPass.");
 
     // Assign large stack objects first.
     for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
@@ -968,6 +976,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
       llvm_unreachable("Unexpected SSPLayoutKind.");
     }
 
+    // We expect **all** the protected stack objects to be pre-allocated by
+    // LocalStackSlotPass. If it turns out that PEI still has to allocate some
+    // of them, we may end up messing up the expected order of the objects.
+    if (MFI.getUseLocalStackAllocationBlock() &&
+        !(LargeArrayObjs.empty() && SmallArrayObjs.empty() &&
+          AddrOfObjs.empty()))
+      llvm_unreachable("Found protected stack objects not pre-allocated by "
+                       "LocalStackSlotPass.");
+
     AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
                           Offset, MaxAlign, Skew);
     AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll b/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll
index a3b00fd2cca9c..632774c970eae 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-reassign.ll
@@ -3,7 +3,7 @@
 ; Verify that the offset assigned to the stack protector is at the top of the
 ; frame, covering the locals.
 ; CHECK-LABEL: fn:
-; CHECK:      add x8, sp, #24
+; CHECK:      sub x8, x29, #24
 ; CHECK-NEXT: adrp x9, __stack_chk_guard
 ; CHECK-NEXT: ldr x9, [x9, :lo12:__stack_chk_guard]
 ; CHECK-NEXT: str x9, [x8]
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir b/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir
index 296290e2b0f12..c0dfcbce1d88a 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir
+++ b/llvm/test/CodeGen/AArch64/stack-guard-reassign.mir
@@ -11,13 +11,13 @@
 name:            main
 tracksRegLiveness: true
 frameInfo:
-# CHECK: stackSize: 560
+# CHECK: stackSize: 544
   stackProtector:  '%stack.0.StackGuardSlot'
 stack:
   - { id: 0, name: StackGuardSlot, size: 8, alignment: 8, stack-id: default }
 # Verify that the offset assigned to the stack protector is at the top of the
 # frame, covering the locals.
-# CHECK:       - { id: 0, name: StackGuardSlot, type: default, offset: -552, size: 8,
+# CHECK:       - { id: 0, name: StackGuardSlot, type: default, offset: -24, size: 8,
 # CHECK-NEXT:      alignment: 8, stack-id: default, callee-saved-register: '', callee-saved-restored: true,
 # CHECK-NEXT:      local-offset: -8, debug-info-variable: '', debug-info-expression: '',
 # CHECK-NEXT:      debug-info-location: '' }
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll b/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll
index e083aa10e3334..29e66d957877d 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-vaarg.ll
@@ -9,7 +9,7 @@
 ; CHECK: ldr [[GUARD:x[0-9]+]]{{.*}}:lo12:__stack_chk_guard]
 ; Make sure the canary is placed relative to the frame pointer, not
 ; the stack pointer.
-; CHECK: str [[GUARD]], [sp, #8]
+; CHECK: stur [[GUARD]], [x29, #-24]
 define void @test(i8* %i, ...) #0 {
 entry:
   %buf = alloca [10 x i8], align 1
diff --git a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
index ae3d91361e228..2ce1d1588a44f 100644
--- a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
+++ b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
@@ -3,12 +3,12 @@
 ; Verify that the offset assigned to the stack protector is at the top of the
 ; frame, covering the locals.
 ; CHECK-LABEL: fn:
-; CHECK:      sub sp, sp, #40
+; CHECK:      sub sp, sp, #32
 ; CHECK-NEXT: sub sp, sp, #65536
-; CHECK-NEXT: add r1, sp, #28
+; CHECK-NEXT: add lr, sp, #65536
+; CHECK-NEXT: add r1, lr, #28
 ; CHECK-NEXT: ldr r2, .LCPI0_0
 ; CHECK-NEXT: ldr r3, [r2]
 ; CHECK-NEXT: str r3, [r1]
-; CHECK-NEXT: str r0, [sp, #32]
 ; CHECK: .LCPI0_0:
 ; CHECK-NEXT: .long __stack_chk_guard
diff --git a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
index 3a8cac506c29e..8128b63d59824 100644
--- a/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
@@ -9,7 +9,9 @@
 ; CHECK-NEXT: ori 0, 0, 65488
 ; CHECK-NEXT: stwux 1, 1, 0
 ; CHECK-NEXT: subf 0, 0, 1
-; CHECK-NEXT: addi 4, 1, 36
+; CHECK-NEXT: lis 4, 1
+; CHECK-NEXT: ori 4, 4, 44
+; CHECK-NEXT: add 4, 1, 4
 ; CHECK-NEXT: lis 5, __stack_chk_guard@ha
 ; CHECK-NEXT: lwz 6, __stack_chk_guard@l(5)
 ; CHECK-NEXT: stw 6, 0(4)

From 6e4076699c342ce3bde6ed6373849301b6f2e447 Mon Sep 17 00:00:00 2001
From: Nilanjana Basu <nilanjana.basu87@gmail.com>
Date: Wed, 17 Jul 2019 21:01:12 +0000
Subject: [PATCH 390/451] Adding inline comments to code view type record
 directives for better readability

llvm-svn: 366372
---
 .../DebugInfo/CodeView/CodeViewRecordIO.h     |  49 ++--
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp |  17 +-
 .../DebugInfo/CodeView/CodeViewRecordIO.cpp   |  54 ++--
 .../DebugInfo/CodeView/TypeRecordMapping.cpp  | 228 +++++++++--------
 llvm/test/DebugInfo/COFF/types-basic.ll       | 242 +++++++++---------
 5 files changed, 325 insertions(+), 265 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index d3bad4cd405ca..00fb0cf4cc902 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -32,6 +32,7 @@ class CodeViewRecordStreamer {
   virtual void EmitBytes(StringRef Data) = 0;
   virtual void EmitIntValue(uint64_t Value, unsigned Size) = 0;
   virtual void EmitBinaryData(StringRef Data) = 0;
+  virtual void AddComment(const Twine &T) = 0;
   virtual ~CodeViewRecordStreamer() = default;
 };
 
@@ -59,7 +60,7 @@ class CodeViewRecordIO {
   Error beginRecord(Optional<uint32_t> MaxLength);
   Error endRecord();
 
-  Error mapInteger(TypeIndex &TypeInd);
+  Error mapInteger(TypeIndex &TypeInd, const Twine &Comment = "");
 
   bool isStreaming() const {
     return (Streamer != nullptr) && (Reader == nullptr) && (Writer == nullptr);
@@ -92,8 +93,9 @@ class CodeViewRecordIO {
     return Error::success();
   }
 
-  template <typename T> Error mapInteger(T &Value) {
+  template <typename T> Error mapInteger(T &Value, const Twine &Comment = "") {
     if (isStreaming()) {
+      emitComment(Comment);
       Streamer->EmitIntValue((int)Value, sizeof(T));
       incrStreamedLen(sizeof(T));
       return Error::success();
@@ -105,7 +107,7 @@ class CodeViewRecordIO {
     return Reader->readInteger(Value);
   }
 
-  template <typename T> Error mapEnum(T &Value) {
+  template <typename T> Error mapEnum(T &Value, const Twine &Comment = "") {
     if (!isStreaming() && sizeof(Value) > maxFieldLength())
       return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
 
@@ -115,7 +117,7 @@ class CodeViewRecordIO {
     if (isWriting() || isStreaming())
       X = static_cast<U>(Value);
 
-    if (auto EC = mapInteger(X))
+    if (auto EC = mapInteger(X, Comment))
       return EC;
 
     if (isReading())
@@ -124,19 +126,22 @@ class CodeViewRecordIO {
     return Error::success();
   }
 
-  Error mapEncodedInteger(int64_t &Value);
-  Error mapEncodedInteger(uint64_t &Value);
-  Error mapEncodedInteger(APSInt &Value);
-  Error mapStringZ(StringRef &Value);
-  Error mapGuid(GUID &Guid);
+  Error mapEncodedInteger(int64_t &Value, const Twine &Comment = "");
+  Error mapEncodedInteger(uint64_t &Value, const Twine &Comment = "");
+  Error mapEncodedInteger(APSInt &Value, const Twine &Comment = "");
+  Error mapStringZ(StringRef &Value, const Twine &Comment = "");
+  Error mapGuid(GUID &Guid, const Twine &Comment = "");
 
-  Error mapStringZVectorZ(std::vector<StringRef> &Value);
+  Error mapStringZVectorZ(std::vector<StringRef> &Value,
+                          const Twine &Comment = "");
 
   template <typename SizeType, typename T, typename ElementMapper>
-  Error mapVectorN(T &Items, const ElementMapper &Mapper) {
+  Error mapVectorN(T &Items, const ElementMapper &Mapper,
+                   const Twine &Comment = "") {
     SizeType Size;
     if (isStreaming()) {
       Size = static_cast<SizeType>(Items.size());
+      emitComment(Comment);
       Streamer->EmitIntValue(Size, sizeof(Size));
       incrStreamedLen(sizeof(Size)); // add 1 for the delimiter
 
@@ -168,7 +173,9 @@ class CodeViewRecordIO {
   }
 
   template <typename T, typename ElementMapper>
-  Error mapVectorTail(T &Items, const ElementMapper &Mapper) {
+  Error mapVectorTail(T &Items, const ElementMapper &Mapper,
+                      const Twine &Comment = "") {
+    emitComment(Comment);
     if (isStreaming() || isWriting()) {
       for (auto &Item : Items) {
         if (auto EC = Mapper(*this, Item))
@@ -186,8 +193,9 @@ class CodeViewRecordIO {
     return Error::success();
   }
 
-  Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes);
-  Error mapByteVectorTail(std::vector<uint8_t> &Bytes);
+  Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes, const Twine &Comment = "");
+  Error mapByteVectorTail(std::vector<uint8_t> &Bytes,
+                          const Twine &Comment = "");
 
   Error padToAlignment(uint32_t Align);
   Error skipPadding();
@@ -199,8 +207,10 @@ class CodeViewRecordIO {
   }
 
 private:
-  void emitEncodedSignedInteger(const int64_t &Value);
-  void emitEncodedUnsignedInteger(const uint64_t &Value);
+  void emitEncodedSignedInteger(const int64_t &Value,
+                                const Twine &Comment = "");
+  void emitEncodedUnsignedInteger(const uint64_t &Value,
+                                  const Twine &Comment = "");
   Error writeEncodedSignedInteger(const int64_t &Value);
   Error writeEncodedUnsignedInteger(const uint64_t &Value);
 
@@ -214,6 +224,13 @@ class CodeViewRecordIO {
       StreamedLen = 4; // The record prefix is 4 bytes long
   }
 
+  void emitComment(const Twine &Comment) {
+    if (isStreaming()) {
+      Twine TComment(Comment);
+      Streamer->AddComment(TComment);
+    }
+  }
+
   struct RecordLimit {
     uint32_t BeginOffset;
     Optional<uint32_t> MaxLength;
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 40f874bbea2df..bd0ace9e1bb09 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -108,6 +108,8 @@ class CVMCAdapter : public CodeViewRecordStreamer {
 
   void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); }
 
+  void AddComment(const Twine &T) { OS->AddComment(T); }
+
 private:
   MCStreamer *OS = nullptr;
 };
@@ -615,6 +617,13 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
   OS.EmitBytes(NullTerminatedString);
 }
 
+static StringRef getTypeLeafName(TypeLeafKind TypeKind) {
+  for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames())
+    if (EE.Value == TypeKind)
+      return EE.Name;
+  return "";
+}
+
 void CodeViewDebug::emitTypeInformation() {
   if (TypeTable.empty())
     return;
@@ -659,8 +668,12 @@ void CodeViewDebug::emitTypeInformation() {
 
     auto RecordLen = Record.length();
     auto RecordKind = Record.kind();
-    OS.EmitIntValue(RecordLen - 2, 2);
-    OS.EmitIntValue(RecordKind, sizeof(RecordKind));
+    if (OS.isVerboseAsm())
+      CVMCOS.AddComment("Record length");
+    CVMCOS.EmitIntValue(RecordLen - 2, 2);
+    if (OS.isVerboseAsm())
+      CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind));
+    CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind));
 
     Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
 
diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 2b69a1e0768fa..2f49474115a19 100644
--- a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -97,8 +97,10 @@ Error CodeViewRecordIO::skipPadding() {
   return Reader->skip(BytesToAdvance);
 }
 
-Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
+Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes,
+                                          const Twine &Comment) {
   if (isStreaming()) {
+    emitComment(Comment);
     Streamer->EmitBinaryData(toStringRef(Bytes));
     incrStreamedLen(Bytes.size());
   } else if (isWriting()) {
@@ -111,9 +113,10 @@ Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
+Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes,
+                                          const Twine &Comment) {
   ArrayRef<uint8_t> BytesRef(Bytes);
-  if (auto EC = mapByteVectorTail(BytesRef))
+  if (auto EC = mapByteVectorTail(BytesRef, Comment))
     return EC;
   if (!isWriting())
     Bytes.assign(BytesRef.begin(), BytesRef.end());
@@ -121,8 +124,9 @@ Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd) {
+Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) {
   if (isStreaming()) {
+    emitComment(Comment);
     Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex()));
     incrStreamedLen(sizeof(TypeInd.getIndex()));
   } else if (isWriting()) {
@@ -137,12 +141,13 @@ Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
+Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value,
+                                          const Twine &Comment) {
   if (isStreaming()) {
     if (Value >= 0)
-      emitEncodedUnsignedInteger(static_cast<uint64_t>(Value));
+      emitEncodedUnsignedInteger(static_cast<uint64_t>(Value), Comment);
     else
-      emitEncodedSignedInteger(Value);
+      emitEncodedSignedInteger(Value, Comment);
   } else if (isWriting()) {
     if (Value >= 0) {
       if (auto EC = writeEncodedUnsignedInteger(static_cast<uint64_t>(Value)))
@@ -161,9 +166,10 @@ Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
+Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value,
+                                          const Twine &Comment) {
   if (isStreaming())
-    emitEncodedUnsignedInteger(Value);
+    emitEncodedUnsignedInteger(Value, Comment);
   else if (isWriting()) {
     if (auto EC = writeEncodedUnsignedInteger(Value))
       return EC;
@@ -176,12 +182,12 @@ Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value) {
+Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value, const Twine &Comment) {
   if (isStreaming()) {
     if (Value.isSigned())
-      emitEncodedSignedInteger(Value.getSExtValue());
+      emitEncodedSignedInteger(Value.getSExtValue(), Comment);
     else
-      emitEncodedUnsignedInteger(Value.getZExtValue());
+      emitEncodedUnsignedInteger(Value.getZExtValue(), Comment);
   } else if (isWriting()) {
     if (Value.isSigned())
       return writeEncodedSignedInteger(Value.getSExtValue());
@@ -191,9 +197,10 @@ Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
+Error CodeViewRecordIO::mapStringZ(StringRef &Value, const Twine &Comment) {
   if (isStreaming()) {
     auto NullTerminatedString = StringRef(Value.data(), Value.size() + 1);
+    emitComment(Comment);
     Streamer->EmitBytes(NullTerminatedString);
     incrStreamedLen(NullTerminatedString.size());
   } else if (isWriting()) {
@@ -208,12 +215,13 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapGuid(GUID &Guid) {
+Error CodeViewRecordIO::mapGuid(GUID &Guid, const Twine &Comment) {
   constexpr uint32_t GuidSize = 16;
 
   if (isStreaming()) {
     StringRef GuidSR =
         StringRef((reinterpret_cast<const char *>(&Guid)), GuidSize);
+    emitComment(Comment);
     Streamer->EmitBytes(GuidSR);
     incrStreamedLen(GuidSize);
     return Error::success();
@@ -234,9 +242,11 @@ Error CodeViewRecordIO::mapGuid(GUID &Guid) {
   return Error::success();
 }
 
-Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
+Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value,
+                                          const Twine &Comment) {
 
   if (!isReading()) {
+    emitComment(Comment);
     for (auto V : Value) {
       if (auto EC = mapStringZ(V))
         return EC;
@@ -257,41 +267,51 @@ Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
   return Error::success();
 }
 
-void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value) {
+void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value,
+                                                const Twine &Comment) {
   assert(Value < 0 && "Encoded integer is not signed!");
   if (Value >= std::numeric_limits<int8_t>::min()) {
     Streamer->EmitIntValue(LF_CHAR, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 1);
     incrStreamedLen(3);
   } else if (Value >= std::numeric_limits<int16_t>::min()) {
     Streamer->EmitIntValue(LF_SHORT, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 2);
     incrStreamedLen(4);
   } else if (Value >= std::numeric_limits<int32_t>::min()) {
     Streamer->EmitIntValue(LF_LONG, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 4);
     incrStreamedLen(6);
   } else {
     Streamer->EmitIntValue(LF_QUADWORD, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 4);
     incrStreamedLen(6);
   }
 }
 
-void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value) {
+void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value,
+                                                  const Twine &Comment) {
   if (Value < LF_NUMERIC) {
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 2);
     incrStreamedLen(2);
   } else if (Value <= std::numeric_limits<uint16_t>::max()) {
     Streamer->EmitIntValue(LF_USHORT, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 2);
     incrStreamedLen(4);
   } else if (Value <= std::numeric_limits<uint32_t>::max()) {
     Streamer->EmitIntValue(LF_ULONG, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 4);
     incrStreamedLen(6);
   } else {
     Streamer->EmitIntValue(LF_UQUADWORD, 2);
+    emitComment(Comment);
     Streamer->EmitIntValue(Value, 8);
     incrStreamedLen(6);
   }
diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 104e310d41b9f..8e8eba4d53e7f 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -21,19 +21,19 @@ struct MapOneMethodRecord {
       : IsFromOverloadList(IsFromOverloadList) {}
 
   Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const {
-    error(IO.mapInteger(Method.Attrs.Attrs));
+    error(IO.mapInteger(Method.Attrs.Attrs, "AccessSpecifier"));
     if (IsFromOverloadList) {
       uint16_t Padding = 0;
-      error(IO.mapInteger(Padding));
+      error(IO.mapInteger(Padding, "Padding"));
     }
-    error(IO.mapInteger(Method.Type));
+    error(IO.mapInteger(Method.Type, "Type"));
     if (Method.isIntroducingVirtual()) {
-      error(IO.mapInteger(Method.VFTableOffset));
+      error(IO.mapInteger(Method.VFTableOffset, "VFTableOffset"));
     } else if (IO.isReading())
       Method.VFTableOffset = -1;
 
     if (!IsFromOverloadList)
-      error(IO.mapStringZ(Method.Name));
+      error(IO.mapStringZ(Method.Name, "Name"));
 
     return Error::success();
   }
@@ -75,9 +75,9 @@ static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
     // Reading & Streaming mode come after writing mode is executed for each
     // record. Truncating large names are done during writing, so its not
     // necessary to do it while reading or streaming.
-    error(IO.mapStringZ(Name));
+    error(IO.mapStringZ(Name, "Name"));
     if (HasUniqueName)
-      error(IO.mapStringZ(UniqueName));
+      error(IO.mapStringZ(UniqueName, "LinkageName"));
   }
 
   return Error::success();
@@ -144,32 +144,32 @@ Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) {
-  error(IO.mapInteger(Record.ModifiedType));
-  error(IO.mapEnum(Record.Modifiers));
+  error(IO.mapInteger(Record.ModifiedType, "ModifiedType"));
+  error(IO.mapEnum(Record.Modifiers, "Modifiers"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           ProcedureRecord &Record) {
-  error(IO.mapInteger(Record.ReturnType));
-  error(IO.mapEnum(Record.CallConv));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.ParameterCount));
-  error(IO.mapInteger(Record.ArgumentList));
+  error(IO.mapInteger(Record.ReturnType, "ReturnType"));
+  error(IO.mapEnum(Record.CallConv, "CallingConvention"));
+  error(IO.mapEnum(Record.Options, "FunctionOptions"));
+  error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
+  error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           MemberFunctionRecord &Record) {
-  error(IO.mapInteger(Record.ReturnType));
-  error(IO.mapInteger(Record.ClassType));
-  error(IO.mapInteger(Record.ThisType));
-  error(IO.mapEnum(Record.CallConv));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.ParameterCount));
-  error(IO.mapInteger(Record.ArgumentList));
-  error(IO.mapInteger(Record.ThisPointerAdjustment));
+  error(IO.mapInteger(Record.ReturnType, "ReturnType"));
+  error(IO.mapInteger(Record.ClassType, "ClassType"));
+  error(IO.mapInteger(Record.ThisType, "ThisType"));
+  error(IO.mapEnum(Record.CallConv, "CallingConvention"));
+  error(IO.mapEnum(Record.Options, "FunctionOptions"));
+  error(IO.mapInteger(Record.ParameterCount, "NumParameters"));
+  error(IO.mapInteger(Record.ArgumentList, "ArgListType"));
+  error(IO.mapInteger(Record.ThisPointerAdjustment, "ThisAdjustment"));
 
   return Error::success();
 }
@@ -177,7 +177,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArgListRecord &Record) {
   error(IO.mapVectorN<uint32_t>(
       Record.ArgIndices,
-      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+      [](CodeViewRecordIO &IO, TypeIndex &N) {
+        return IO.mapInteger(N, "Argument");
+      },
+      "NumArgs"));
   return Error::success();
 }
 
@@ -185,32 +188,35 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           StringListRecord &Record) {
   error(IO.mapVectorN<uint32_t>(
       Record.StringIndices,
-      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+      [](CodeViewRecordIO &IO, TypeIndex &N) {
+        return IO.mapInteger(N, "Strings");
+      },
+      "NumStrings"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) {
-  error(IO.mapInteger(Record.ReferentType));
-  error(IO.mapInteger(Record.Attrs));
+  error(IO.mapInteger(Record.ReferentType, "PointeeType"));
+  error(IO.mapInteger(Record.Attrs, "Attributes"));
 
   if (Record.isPointerToMember()) {
     if (IO.isReading())
       Record.MemberInfo.emplace();
 
     MemberPointerInfo &M = *Record.MemberInfo;
-    error(IO.mapInteger(M.ContainingType));
-    error(IO.mapEnum(M.Representation));
+    error(IO.mapInteger(M.ContainingType, "ClassType"));
+    error(IO.mapEnum(M.Representation, "Representation"));
   }
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArrayRecord &Record) {
-  error(IO.mapInteger(Record.ElementType));
-  error(IO.mapInteger(Record.IndexType));
-  error(IO.mapEncodedInteger(Record.Size));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.ElementType, "ElementType"));
+  error(IO.mapInteger(Record.IndexType, "IndexType"));
+  error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -220,12 +226,12 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
          (CVR.kind() == TypeLeafKind::LF_CLASS) ||
          (CVR.kind() == TypeLeafKind::LF_INTERFACE));
 
-  error(IO.mapInteger(Record.MemberCount));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.FieldList));
-  error(IO.mapInteger(Record.DerivationList));
-  error(IO.mapInteger(Record.VTableShape));
-  error(IO.mapEncodedInteger(Record.Size));
+  error(IO.mapInteger(Record.MemberCount, "MemberCount"));
+  error(IO.mapEnum(Record.Options, "Properties"));
+  error(IO.mapInteger(Record.FieldList, "FieldList"));
+  error(IO.mapInteger(Record.DerivationList, "DerivedFrom"));
+  error(IO.mapInteger(Record.VTableShape, "VShape"));
+  error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
   error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
                              Record.hasUniqueName()));
 
@@ -233,10 +239,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
-  error(IO.mapInteger(Record.MemberCount));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.FieldList));
-  error(IO.mapEncodedInteger(Record.Size));
+  error(IO.mapInteger(Record.MemberCount, "MemberCount"));
+  error(IO.mapEnum(Record.Options, "Properties"));
+  error(IO.mapInteger(Record.FieldList, "FieldList"));
+  error(IO.mapEncodedInteger(Record.Size, "SizeOf"));
   error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
                              Record.hasUniqueName()));
 
@@ -244,10 +250,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
-  error(IO.mapInteger(Record.MemberCount));
-  error(IO.mapEnum(Record.Options));
-  error(IO.mapInteger(Record.UnderlyingType));
-  error(IO.mapInteger(Record.FieldList));
+  error(IO.mapInteger(Record.MemberCount, "NumEnumerators"));
+  error(IO.mapEnum(Record.Options, "Properties"));
+  error(IO.mapInteger(Record.UnderlyingType, "UnderlyingType"));
+  error(IO.mapInteger(Record.FieldList, "FieldListType"));
   error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
                              Record.hasUniqueName()));
 
@@ -255,9 +261,9 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, BitFieldRecord &Record) {
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapInteger(Record.BitSize));
-  error(IO.mapInteger(Record.BitOffset));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapInteger(Record.BitSize, "BitSize"));
+  error(IO.mapInteger(Record.BitOffset, "BitOffset"));
 
   return Error::success();
 }
@@ -268,7 +274,7 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
   if (!IO.isReading()) {
     ArrayRef<VFTableSlotKind> Slots = Record.getSlots();
     Size = Slots.size();
-    error(IO.mapInteger(Size));
+    error(IO.mapInteger(Size, "VFEntryCount"));
 
     for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
       uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
@@ -292,61 +298,64 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, VFTableRecord &Record) {
-  error(IO.mapInteger(Record.CompleteClass));
-  error(IO.mapInteger(Record.OverriddenVFTable));
-  error(IO.mapInteger(Record.VFPtrOffset));
+  error(IO.mapInteger(Record.CompleteClass, "CompleteClass"));
+  error(IO.mapInteger(Record.OverriddenVFTable, "OverriddenVFTable"));
+  error(IO.mapInteger(Record.VFPtrOffset, "VFPtrOffset"));
   uint32_t NamesLen = 0;
   if (!IO.isReading()) {
     for (auto Name : Record.MethodNames)
       NamesLen += Name.size() + 1;
   }
-  error(IO.mapInteger(NamesLen));
+  error(IO.mapInteger(NamesLen, ""));
   error(IO.mapVectorTail(
       Record.MethodNames,
-      [](CodeViewRecordIO &IO, StringRef &S) { return IO.mapStringZ(S); }));
+      [](CodeViewRecordIO &IO, StringRef &S) {
+        return IO.mapStringZ(S, "MethodName");
+      },
+      "VFTableName"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, StringIdRecord &Record) {
-  error(IO.mapInteger(Record.Id));
-  error(IO.mapStringZ(Record.String));
+  error(IO.mapInteger(Record.Id, "Id"));
+  error(IO.mapStringZ(Record.String, "StringData"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           UdtSourceLineRecord &Record) {
-  error(IO.mapInteger(Record.UDT));
-  error(IO.mapInteger(Record.SourceFile));
-  error(IO.mapInteger(Record.LineNumber));
+  error(IO.mapInteger(Record.UDT, "UDT"));
+  error(IO.mapInteger(Record.SourceFile, "SourceFile"));
+  error(IO.mapInteger(Record.LineNumber, "LineNumber"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           UdtModSourceLineRecord &Record) {
-  error(IO.mapInteger(Record.UDT));
-  error(IO.mapInteger(Record.SourceFile));
-  error(IO.mapInteger(Record.LineNumber));
-  error(IO.mapInteger(Record.Module));
+  error(IO.mapInteger(Record.UDT, "UDT"));
+  error(IO.mapInteger(Record.SourceFile, "SourceFile"));
+  error(IO.mapInteger(Record.LineNumber, "LineNumber"));
+  error(IO.mapInteger(Record.Module, "Module"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, FuncIdRecord &Record) {
-  error(IO.mapInteger(Record.ParentScope));
-  error(IO.mapInteger(Record.FunctionType));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.ParentScope, "ParentScope"));
+  error(IO.mapInteger(Record.FunctionType, "FunctionType"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           MemberFuncIdRecord &Record) {
-  error(IO.mapInteger(Record.ClassType));
-  error(IO.mapInteger(Record.FunctionType));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.ClassType, "ClassType"));
+  error(IO.mapInteger(Record.FunctionType, "FunctionType"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -355,7 +364,10 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           BuildInfoRecord &Record) {
   error(IO.mapVectorN<uint16_t>(
       Record.ArgIndices,
-      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+      [](CodeViewRecordIO &IO, TypeIndex &N) {
+        return IO.mapInteger(N, "Argument");
+      },
+      "NumArgs"));
 
   return Error::success();
 }
@@ -364,7 +376,7 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           MethodOverloadListRecord &Record) {
   // TODO: Split the list into multiple records if it's longer than 64KB, using
   // a subrecord of TypeRecordKind::Index to chain the records together.
-  error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true)));
+  error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true), "Method"));
 
   return Error::success();
 }
@@ -378,22 +390,22 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           TypeServer2Record &Record) {
-  error(IO.mapGuid(Record.Guid));
-  error(IO.mapInteger(Record.Age));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapGuid(Record.Guid, "Guid"));
+  error(IO.mapInteger(Record.Age, "Age"));
+  error(IO.mapStringZ(Record.Name, "Name"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) {
-  error(IO.mapEnum(Record.Mode));
+  error(IO.mapEnum(Record.Mode, "Mode"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           BaseClassRecord &Record) {
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapEncodedInteger(Record.Offset));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.Type, "BaseType"));
+  error(IO.mapEncodedInteger(Record.Offset, "BaseOffset"));
 
   return Error::success();
 }
@@ -403,27 +415,27 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
   error(IO.mapInteger(Record.Attrs.Attrs));
 
   // FIXME: Handle full APInt such as __int128.
-  error(IO.mapEncodedInteger(Record.Value));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapEncodedInteger(Record.Value, "EnumValue"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           DataMemberRecord &Record) {
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapEncodedInteger(Record.FieldOffset));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapEncodedInteger(Record.FieldOffset, "FieldOffset"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           OverloadedMethodRecord &Record) {
-  error(IO.mapInteger(Record.NumOverloads));
-  error(IO.mapInteger(Record.MethodList));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.NumOverloads, "MethodCount"));
+  error(IO.mapInteger(Record.MethodList, "MethodListIndex"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -438,9 +450,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           NestedTypeRecord &Record) {
   uint16_t Padding = 0;
-  error(IO.mapInteger(Padding));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Padding, "Padding"));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -448,9 +460,9 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           StaticDataMemberRecord &Record) {
 
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.Type));
-  error(IO.mapStringZ(Record.Name));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.Type, "Type"));
+  error(IO.mapStringZ(Record.Name, "Name"));
 
   return Error::success();
 }
@@ -458,11 +470,11 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           VirtualBaseClassRecord &Record) {
 
-  error(IO.mapInteger(Record.Attrs.Attrs));
-  error(IO.mapInteger(Record.BaseType));
-  error(IO.mapInteger(Record.VBPtrType));
-  error(IO.mapEncodedInteger(Record.VBPtrOffset));
-  error(IO.mapEncodedInteger(Record.VTableIndex));
+  error(IO.mapInteger(Record.Attrs.Attrs, "AccessSpecifier"));
+  error(IO.mapInteger(Record.BaseType, "BaseType"));
+  error(IO.mapInteger(Record.VBPtrType, "VBPtrType"));
+  error(IO.mapEncodedInteger(Record.VBPtrOffset, "VBPtrOffset"));
+  error(IO.mapEncodedInteger(Record.VTableIndex, "VBTableIndex"));
 
   return Error::success();
 }
@@ -470,8 +482,8 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           VFPtrRecord &Record) {
   uint16_t Padding = 0;
-  error(IO.mapInteger(Padding));
-  error(IO.mapInteger(Record.Type));
+  error(IO.mapInteger(Padding, "Padding"));
+  error(IO.mapInteger(Record.Type, "Type"));
 
   return Error::success();
 }
@@ -479,23 +491,23 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
 Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
                                           ListContinuationRecord &Record) {
   uint16_t Padding = 0;
-  error(IO.mapInteger(Padding));
-  error(IO.mapInteger(Record.ContinuationIndex));
+  error(IO.mapInteger(Padding, "Padding"));
+  error(IO.mapInteger(Record.ContinuationIndex, "ContinuationIndex"));
 
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           PrecompRecord &Precomp) {
-  error(IO.mapInteger(Precomp.StartTypeIndex));
-  error(IO.mapInteger(Precomp.TypesCount));
-  error(IO.mapInteger(Precomp.Signature));
-  error(IO.mapStringZ(Precomp.PrecompFilePath));
+  error(IO.mapInteger(Precomp.StartTypeIndex, "StartIndex"));
+  error(IO.mapInteger(Precomp.TypesCount, "Count"));
+  error(IO.mapInteger(Precomp.Signature, "Signature"));
+  error(IO.mapStringZ(Precomp.PrecompFilePath, "PrecompFile"));
   return Error::success();
 }
 
 Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
                                           EndPrecompRecord &EndPrecomp) {
-  error(IO.mapInteger(EndPrecomp.Signature));
+  error(IO.mapInteger(EndPrecomp.Signature, "Signature"));
   return Error::success();
 }
diff --git a/llvm/test/DebugInfo/COFF/types-basic.ll b/llvm/test/DebugInfo/COFF/types-basic.ll
index 343615a2e652a..a6131988b1c7d 100644
--- a/llvm/test/DebugInfo/COFF/types-basic.ll
+++ b/llvm/test/DebugInfo/COFF/types-basic.ll
@@ -347,17 +347,15 @@
 ; CHECK:   ]
 ; CHECK: ]
 
-
-
 ; ASM: .section	.debug$T,"dr"
 ; ASM: .p2align	2
 ; ASM: .long	4                       # Debug section magic
-; ASM: .short	18
-; ASM: .short	4609
-; ASM: .long	3
-; ASM: .long	64
-; ASM: .long	65
-; ASM: .long	19
+; ASM: .short	18                      # Record length
+; ASM: .short	4609                    # Record kind: LF_ARGLIST
+; ASM: .long	3                       # NumArgs
+; ASM: .long	64                      # Argument
+; ASM: .long	65                      # Argument
+; ASM: .long	19                      # Argument
 ; ASM: # ArgList (0x1000) {
 ; ASM: #   TypeLeafKind: LF_ARGLIST (0x1201)
 ; ASM: #   NumArgs: 3
@@ -367,13 +365,13 @@
 ; ASM: #     ArgType: __int64 (0x13)
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	14
-; ASM: .short	4104
-; ASM: .long	3
-; ASM: .byte	0
-; ASM: .byte	0
-; ASM: .short	3
-; ASM: .long	4096
+; ASM: .short	14                      # Record length
+; ASM: .short	4104                    # Record kind: LF_PROCEDURE
+; ASM: .long	3                       # ReturnType
+; ASM: .byte	0                       # CallingConvention
+; ASM: .byte	0                       # FunctionOptions
+; ASM: .short	3                       # NumParameters
+; ASM: .long	4096                    # ArgListType
 ; ASM: # Procedure (0x1001) {
 ; ASM: #   TypeLeafKind: LF_PROCEDURE (0x1008)
 ; ASM: #   ReturnType: void (0x3)
@@ -383,11 +381,11 @@
 ; ASM: #   NumParameters: 3
 ; ASM: #   ArgListType: (float, double, __int64) (0x1000)
 ; ASM: # }
-; ASM: .short	14
-; ASM: .short	5633
-; ASM: .long	0
-; ASM: .long	4097
-; ASM: .asciz	"f"
+; ASM: .short	14                      # Record length
+; ASM: .short	5633                    # Record kind: LF_FUNC_ID
+; ASM: .long	0                       # ParentScope
+; ASM: .long	4097                    # FunctionType
+; ASM: .asciz	"f"                     # Name
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # FuncId (0x1002) {
@@ -396,10 +394,10 @@
 ; ASM: #   FunctionType: void (float, double, __int64) (0x1001)
 ; ASM: #   Name: f
 ; ASM: # }
-; ASM: .short	10
-; ASM: .short	4097
-; ASM: .long	116
-; ASM: .short	1
+; ASM: .short	10                      # Record length
+; ASM: .short	4097                    # Record kind: LF_MODIFIER
+; ASM: .long	116                     # ModifiedType
+; ASM: .short	1                       # Modifiers
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Modifier (0x1003) {
@@ -409,10 +407,10 @@
 ; ASM: #     Const (0x1)
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	10
-; ASM: .short	4098
-; ASM: .long	4099
-; ASM: .long	65548
+; ASM: .short	10                      # Record length
+; ASM: .short	4098                    # Record kind: LF_POINTER
+; ASM: .long	4099                    # PointeeType
+; ASM: .long	65548                   # Attributes
 ; ASM: # Pointer (0x1004) {
 ; ASM: #   TypeLeafKind: LF_POINTER (0x1002)
 ; ASM: #   PointeeType: const int (0x1003)
@@ -427,15 +425,15 @@
 ; ASM: #   IsThisPtr&&: 0
 ; ASM: #   SizeOf: 8
 ; ASM: # }
-; ASM: .short	22
-; ASM: .short	5381
-; ASM: .short	0
-; ASM: .short	128
-; ASM: .long	0
-; ASM: .long	0
-; ASM: .long	0
-; ASM: .short	0
-; ASM: .asciz	"A"
+; ASM: .short	22                      # Record length
+; ASM: .short	5381                    # Record kind: LF_STRUCTURE
+; ASM: .short	0                       # MemberCount
+; ASM: .short	128                     # Properties
+; ASM: .long	0                       # FieldList
+; ASM: .long	0                       # DerivedFrom
+; ASM: .long	0                       # VShape
+; ASM: .short	0                       # SizeOf
+; ASM: .asciz	"A"                     # Name
 ; ASM: # Struct (0x1005) {
 ; ASM: #   TypeLeafKind: LF_STRUCTURE (0x1505)
 ; ASM: #   MemberCount: 0
@@ -448,12 +446,12 @@
 ; ASM: #   SizeOf: 0
 ; ASM: #   Name: A
 ; ASM: # }
-; ASM: .short	18
-; ASM: .short	4098
-; ASM: .long	116
-; ASM: .long	32844
-; ASM: .long	4101
-; ASM: .short	4
+; ASM: .short	18                      # Record length
+; ASM: .short	4098                    # Record kind: LF_POINTER
+; ASM: .long	116                     # PointeeType
+; ASM: .long	32844                   # Attributes
+; ASM: .long	4101                    # ClassType
+; ASM: .short	4                       # Representation
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Pointer (0x1006) {
@@ -472,10 +470,10 @@
 ; ASM: #   ClassType: A (0x1005)
 ; ASM: #   Representation: GeneralData (0x4)
 ; ASM: # }
-; ASM: .short	10
-; ASM: .short	4098
-; ASM: .long	4101
-; ASM: .long	66572
+; ASM: .short	10                      # Record length
+; ASM: .short	4098                    # Record kind: LF_POINTER
+; ASM: .long	4101                    # PointeeType
+; ASM: .long	66572                   # Attributes
 ; ASM: # Pointer (0x1007) {
 ; ASM: #   TypeLeafKind: LF_POINTER (0x1002)
 ; ASM: #   PointeeType: A (0x1005)
@@ -490,25 +488,25 @@
 ; ASM: #   IsThisPtr&&: 0
 ; ASM: #   SizeOf: 8
 ; ASM: # }
-; ASM: .short	6
-; ASM: .short	4609
-; ASM: .long	0
+; ASM: .short	6                       # Record length
+; ASM: .short	4609                    # Record kind: LF_ARGLIST
+; ASM: .long	0                       # NumArgs
 ; ASM: # ArgList (0x1008) {
 ; ASM: #   TypeLeafKind: LF_ARGLIST (0x1201)
 ; ASM: #   NumArgs: 0
 ; ASM: #   Arguments [
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	26
-; ASM: .short	4105
-; ASM: .long	3
-; ASM: .long	4101
-; ASM: .long	4103
-; ASM: .byte	0
-; ASM: .byte	0
-; ASM: .short	0
-; ASM: .long	4104
-; ASM: .long	0
+; ASM: .short	26                      # Record length
+; ASM: .short	4105                    # Record kind: LF_MFUNCTION
+; ASM: .long	3                       # ReturnType
+; ASM: .long	4101                    # ClassType
+; ASM: .long	4103                    # ThisType
+; ASM: .byte	0                       # CallingConvention
+; ASM: .byte	0                       # FunctionOptions
+; ASM: .short	0                       # NumParameters
+; ASM: .long	4104                    # ArgListType
+; ASM: .long	0                       # ThisAdjustment
 ; ASM: # MemberFunction (0x1009) {
 ; ASM: #   TypeLeafKind: LF_MFUNCTION (0x1009)
 ; ASM: #   ReturnType: void (0x3)
@@ -521,8 +519,8 @@
 ; ASM: #   ArgListType: () (0x1008)
 ; ASM: #   ThisAdjustment: 0
 ; ASM: # }
-; ASM: .short	30
-; ASM: .short	4611
+; ASM: .short	30                      # Record length
+; ASM: .short	4611                    # Record kind: LF_FIELDLIST
 ; ASM: .byte	0x0d, 0x15, 0x03, 0x00
 ; ASM: .byte	0x74, 0x00, 0x00, 0x00
 ; ASM: .byte	0x00, 0x00, 0x61, 0x00
@@ -546,15 +544,15 @@
 ; ASM: #     Name: A::f
 ; ASM: #   }
 ; ASM: # }
-; ASM: .short	22
-; ASM: .short	5381
-; ASM: .short	2
-; ASM: .short	0
-; ASM: .long	4106
-; ASM: .long	0
-; ASM: .long	0
-; ASM: .short	4
-; ASM: .asciz	"A"
+; ASM: .short	22                      # Record length
+; ASM: .short	5381                    # Record kind: LF_STRUCTURE
+; ASM: .short	2                       # MemberCount
+; ASM: .short	0                       # Properties
+; ASM: .long	4106                    # FieldList
+; ASM: .long	0                       # DerivedFrom
+; ASM: .long	0                       # VShape
+; ASM: .short	4                       # SizeOf
+; ASM: .asciz	"A"                     # Name
 ; ASM: # Struct (0x100B) {
 ; ASM: #   TypeLeafKind: LF_STRUCTURE (0x1505)
 ; ASM: #   MemberCount: 2
@@ -566,32 +564,32 @@
 ; ASM: #   SizeOf: 4
 ; ASM: #   Name: A
 ; ASM: # }
-; ASM: .short	30
-; ASM: .short	5637
-; ASM: .long	0
-; ASM: .asciz	"D:\\src\\llvm\\build\\t.cpp"
+; ASM: .short	30                      # Record length
+; ASM: .short	5637                    # Record kind: LF_STRING_ID
+; ASM: .long	0                       # Id
+; ASM: .asciz	"D:\\src\\llvm\\build\\t.cpp" # StringData
 ; ASM: # StringId (0x100C) {
 ; ASM: #   TypeLeafKind: LF_STRING_ID (0x1605)
 ; ASM: #   Id: 0x0
 ; ASM: #   StringData: D:\src\llvm\build\t.cpp
 ; ASM: # }
-; ASM: .short	14
-; ASM: .short	5638
-; ASM: .long	4107
-; ASM: .long	4108
-; ASM: .long	1
+; ASM: .short	14                      # Record length
+; ASM: .short	5638                    # Record kind: LF_UDT_SRC_LINE
+; ASM: .long	4107                    # UDT
+; ASM: .long	4108                    # SourceFile
+; ASM: .long	1                       # LineNumber
 ; ASM: # UdtSourceLine (0x100D) {
 ; ASM: #   TypeLeafKind: LF_UDT_SRC_LINE (0x1606)
 ; ASM: #   UDT: A (0x100B)
 ; ASM: #   SourceFile: D:\src\llvm\build\t.cpp (0x100C)
 ; ASM: #   LineNumber: 1
 ; ASM: # }
-; ASM: .short	18
-; ASM: .short	4098
-; ASM: .long	4105
-; ASM: .long	65644
-; ASM: .long	4101
-; ASM: .short	8
+; ASM: .short	18                      # Record length
+; ASM: .short	4098                    # Record kind: LF_POINTER
+; ASM: .long	4105                    # PointeeType
+; ASM: .long	65644                   # Attributes
+; ASM: .long	4101                    # ClassType
+; ASM: .short	8                       # Representation
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Pointer (0x100E) {
@@ -610,10 +608,10 @@
 ; ASM: #   ClassType: A (0x1005)
 ; ASM: #   Representation: GeneralFunction (0x8)
 ; ASM: # }
-; ASM: .short	10
-; ASM: .short	4097
-; ASM: .long	3
-; ASM: .short	1
+; ASM: .short	10                      # Record length
+; ASM: .short	4097                    # Record kind: LF_MODIFIER
+; ASM: .long	3                       # ModifiedType
+; ASM: .short	1                       # Modifiers
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Modifier (0x100F) {
@@ -623,10 +621,10 @@
 ; ASM: #     Const (0x1)
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	10
-; ASM: .short	4098
-; ASM: .long	4111
-; ASM: .long	65548
+; ASM: .short	10                      # Record length
+; ASM: .short	4098                    # Record kind: LF_POINTER
+; ASM: .long	4111                    # PointeeType
+; ASM: .long	65548                   # Attributes
 ; ASM: # Pointer (0x1010) {
 ; ASM: #   TypeLeafKind: LF_POINTER (0x1002)
 ; ASM: #   PointeeType: const void (0x100F)
@@ -641,13 +639,13 @@
 ; ASM: #   IsThisPtr&&: 0
 ; ASM: #   SizeOf: 8
 ; ASM: # }
-; ASM: .short	14
-; ASM: .short	4104
-; ASM: .long	3
-; ASM: .byte	0
-; ASM: .byte	0
-; ASM: .short	0
-; ASM: .long	4104
+; ASM: .short	14                      # Record length
+; ASM: .short	4104                    # Record kind: LF_PROCEDURE
+; ASM: .long	3                       # ReturnType
+; ASM: .byte	0                       # CallingConvention
+; ASM: .byte	0                       # FunctionOptions
+; ASM: .short	0                       # NumParameters
+; ASM: .long	4104                    # ArgListType
 ; ASM: # Procedure (0x1011) {
 ; ASM: #   TypeLeafKind: LF_PROCEDURE (0x1008)
 ; ASM: #   ReturnType: void (0x3)
@@ -657,11 +655,11 @@
 ; ASM: #   NumParameters: 0
 ; ASM: #   ArgListType: () (0x1008)
 ; ASM: # }
-; ASM: .short	22
-; ASM: .short	5633
-; ASM: .long	0
-; ASM: .long	4113
-; ASM: .asciz	"CharTypes"
+; ASM: .short	22                      # Record length
+; ASM: .short	5633                    # Record kind: LF_FUNC_ID
+; ASM: .long	0                       # ParentScope
+; ASM: .long	4113                    # FunctionType
+; ASM: .asciz	"CharTypes"             # Name
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # FuncId (0x1012) {
@@ -670,10 +668,10 @@
 ; ASM: #   FunctionType: void () (0x1011)
 ; ASM: #   Name: CharTypes
 ; ASM: # }
-; ASM: .short	26
-; ASM: .short	5637
-; ASM: .long	0
-; ASM: .asciz	"D:\\src\\llvm\\build"
+; ASM: .short	26                      # Record length
+; ASM: .short	5637                    # Record kind: LF_STRING_ID
+; ASM: .long	0                       # Id
+; ASM: .asciz	"D:\\src\\llvm\\build"  # StringData
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # StringId (0x1013) {
@@ -681,10 +679,10 @@
 ; ASM: #   Id: 0x0
 ; ASM: #   StringData: D:\src\llvm\build
 ; ASM: # }
-; ASM: .short	14
-; ASM: .short	5637
-; ASM: .long	0
-; ASM: .asciz	"t.cpp"
+; ASM: .short	14                      # Record length
+; ASM: .short	5637                    # Record kind: LF_STRING_ID
+; ASM: .long	0                       # Id
+; ASM: .asciz	"t.cpp"                 # StringData
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # StringId (0x1014) {
@@ -692,14 +690,14 @@
 ; ASM: #   Id: 0x0
 ; ASM: #   StringData: t.cpp
 ; ASM: # }
-; ASM: .short	26
-; ASM: .short	5635
-; ASM: .short	5
-; ASM: .long	4115
-; ASM: .long	0
-; ASM: .long	4116
-; ASM: .long	0
-; ASM: .long	0
+; ASM: .short	26                      # Record length
+; ASM: .short	5635                    # Record kind: LF_BUILDINFO
+; ASM: .short	5                       # NumArgs
+; ASM: .long	4115                    # Argument
+; ASM: .long	0                       # Argument
+; ASM: .long	4116                    # Argument
+; ASM: .long	0                       # Argument
+; ASM: .long	0                       # Argument
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # BuildInfo (0x1015) {

From 4a09a73fb091e9100fb95aa364a7afb8daa042dc Mon Sep 17 00:00:00 2001
From: Hideto Ueno <uenoku.tokotoko@gmail.com>
Date: Wed, 17 Jul 2019 21:11:02 +0000
Subject: [PATCH 391/451] [Attributor][NFC] Remove unnecessary debug output

llvm-svn: 366373
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 2f31b3924a9a2..2a52c6b9b4ad9 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1332,7 +1332,6 @@ bool Attributor::checkForAllCallSites(Function &F,
   for (const Use &U : F.uses()) {
 
     CallSite CS(U.getUser());
-    dbgs() << *CS.getInstruction() << "\n";
     if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) {
       if (!RequireAllCallSites)
         continue;

From eece5a9d31840de69a074ed1ea3ed7c0e44916a0 Mon Sep 17 00:00:00 2001
From: Dominik Adamski <adamski.dominik@gmail.com>
Date: Wed, 17 Jul 2019 21:25:18 +0000
Subject: [PATCH 392/451] [NFC][ScopBuilder]Move finalizeAccesses and its
 callees to ScopBuilder

Scope of changes:
1) Moved finalizeAccesses to ScopBuilder
2) Moved updateAccessDimensionality to ScopBuilder
3) Moved foldSizeConstantsToRight to ScopBuilder
4) Moved foldSizeConstantsToRight to ScopBuilder
5) Moved assumeNoOutOfBounds to ScopBuilder
6) Moved markFortranArrays to ScopBuilder
7) Added iterator range for AccessFunctions vector.

Differential Revision: https://reviews.llvm.org/D63794

llvm-svn: 366374
---
 polly/include/polly/ScopBuilder.h  |  63 ++++++++++
 polly/include/polly/ScopInfo.h     |  69 +----------
 polly/lib/Analysis/ScopBuilder.cpp | 191 ++++++++++++++++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp    | 189 ----------------------------
 4 files changed, 259 insertions(+), 253 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index c5cbcf8a8ab42..a6c8c0eb893be 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -178,6 +178,69 @@ class ScopBuilder {
   /// @param Stmt       The parent statement of the instruction
   void buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt);
 
+  /// Finalize all access relations.
+  ///
+  /// When building up access relations, temporary access relations that
+  /// correctly represent each individual access are constructed. However, these
+  /// access relations can be inconsistent or non-optimal when looking at the
+  /// set of accesses as a whole. This function finalizes the memory accesses
+  /// and constructs a globally consistent state.
+  void finalizeAccesses();
+
+  /// Update access dimensionalities.
+  ///
+  /// When detecting memory accesses different accesses to the same array may
+  /// have built with different dimensionality, as outer zero-values dimensions
+  /// may not have been recognized as separate dimensions. This function goes
+  /// again over all memory accesses and updates their dimensionality to match
+  /// the dimensionality of the underlying ScopArrayInfo object.
+  void updateAccessDimensionality();
+
+  /// Fold size constants to the right.
+  ///
+  /// In case all memory accesses in a given dimension are multiplied with a
+  /// common constant, we can remove this constant from the individual access
+  /// functions and move it to the size of the memory access. We do this as this
+  /// increases the size of the innermost dimension, consequently widens the
+  /// valid range the array subscript in this dimension can evaluate to, and
+  /// as a result increases the likelihood that our delinearization is
+  /// correct.
+  ///
+  /// Example:
+  ///
+  ///    A[][n]
+  ///    S[i,j] -> A[2i][2j+1]
+  ///    S[i,j] -> A[2i][2j]
+  ///
+  ///    =>
+  ///
+  ///    A[][2n]
+  ///    S[i,j] -> A[i][2j+1]
+  ///    S[i,j] -> A[i][2j]
+  ///
+  /// Constants in outer dimensions can arise when the elements of a parametric
+  /// multi-dimensional array are not elementary data types, but e.g.,
+  /// structures.
+  void foldSizeConstantsToRight();
+
+  /// Fold memory accesses to handle parametric offset.
+  ///
+  /// As a post-processing step, we 'fold' memory accesses to parametric
+  /// offsets in the access functions. @see MemoryAccess::foldAccess for
+  /// details.
+  void foldAccessRelations();
+
+  /// Assume that all memory accesses are within bounds.
+  ///
+  /// After we have built a model of all memory accesses, we need to assume
+  /// that the model we built matches reality -- aka. all modeled memory
+  /// accesses always remain within bounds. We do this as last step, after
+  /// all memory accesses have been modeled and canonicalized.
+  void assumeNoOutOfBounds();
+
+  /// Mark arrays that have memory accesses with FortranArrayDescriptor.
+  void markFortranArrays();
+
   /// Build the alias checks for this SCoP.
   bool buildAliasChecks();
 
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index e76442aadcefd..0e523bc9dee31 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2090,57 +2090,6 @@ class Scop {
   void addScopStmt(Region *R, StringRef Name, Loop *SurroundingLoop,
                    std::vector<Instruction *> EntryBlockInstructions);
 
-  /// Update access dimensionalities.
-  ///
-  /// When detecting memory accesses different accesses to the same array may
-  /// have built with different dimensionality, as outer zero-values dimensions
-  /// may not have been recognized as separate dimensions. This function goes
-  /// again over all memory accesses and updates their dimensionality to match
-  /// the dimensionality of the underlying ScopArrayInfo object.
-  void updateAccessDimensionality();
-
-  /// Fold size constants to the right.
-  ///
-  /// In case all memory accesses in a given dimension are multiplied with a
-  /// common constant, we can remove this constant from the individual access
-  /// functions and move it to the size of the memory access. We do this as this
-  /// increases the size of the innermost dimension, consequently widens the
-  /// valid range the array subscript in this dimension can evaluate to, and
-  /// as a result increases the likelihood that our delinearization is
-  /// correct.
-  ///
-  /// Example:
-  ///
-  ///    A[][n]
-  ///    S[i,j] -> A[2i][2j+1]
-  ///    S[i,j] -> A[2i][2j]
-  ///
-  ///    =>
-  ///
-  ///    A[][2n]
-  ///    S[i,j] -> A[i][2j+1]
-  ///    S[i,j] -> A[i][2j]
-  ///
-  /// Constants in outer dimensions can arise when the elements of a parametric
-  /// multi-dimensional array are not elementary data types, but e.g.,
-  /// structures.
-  void foldSizeConstantsToRight();
-
-  /// Fold memory accesses to handle parametric offset.
-  ///
-  /// As a post-processing step, we 'fold' memory accesses to parametric
-  /// offsets in the access functions. @see MemoryAccess::foldAccess for
-  /// details.
-  void foldAccessRelations();
-
-  /// Assume that all memory accesses are within bounds.
-  ///
-  /// After we have built a model of all memory accesses, we need to assume
-  /// that the model we built matches reality -- aka. all modeled memory
-  /// accesses always remain within bounds. We do this as last step, after
-  /// all memory accesses have been modeled and canonicalized.
-  void assumeNoOutOfBounds();
-
   /// Remove statements from the list of scop statements.
   ///
   /// @param ShouldDelete  A function that returns true if the statement passed
@@ -2160,18 +2109,6 @@ class Scop {
   /// have a corresponding domain in the domain map (or it is empty).
   void removeStmtNotInDomainMap();
 
-  /// Mark arrays that have memory accesses with FortranArrayDescriptor.
-  void markFortranArrays();
-
-  /// Finalize all access relations.
-  ///
-  /// When building up access relations, temporary access relations that
-  /// correctly represent each individual access are constructed. However, these
-  /// access relations can be inconsistent or non-optimal when looking at the
-  /// set of accesses as a whole. This function finalizes the memory accesses
-  /// and constructs a globally consistent state.
-  void finalizeAccesses();
-
   /// Construct the schedule of this SCoP.
   ///
   /// @param LI The LoopInfo for the current function.
@@ -2348,6 +2285,12 @@ class Scop {
     return make_range(RecordedAssumptions.begin(), RecordedAssumptions.end());
   }
 
+  /// Return an iterator range containing all the MemoryAccess objects of the
+  /// Scop.
+  iterator_range<AccFuncVector::iterator> access_functions() {
+    return make_range(AccessFunctions.begin(), AccessFunctions.end());
+  }
+
   /// Return whether this scop is empty, i.e. contains no statements that
   /// could be executed.
   bool isEmpty() const { return Stmts.empty(); }
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index ec078be083850..b2fdd0731be31 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -1154,6 +1154,195 @@ void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
     MemAccess->setFortranArrayDescriptor(FAD);
 }
 
+/// Check if @p Expr is divisible by @p Size.
+static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
+  assert(Size != 0);
+  if (Size == 1)
+    return true;
+
+  // Only one factor needs to be divisible.
+  if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
+    for (auto *FactorExpr : MulExpr->operands())
+      if (isDivisible(FactorExpr, Size, SE))
+        return true;
+    return false;
+  }
+
+  // For other n-ary expressions (Add, AddRec, Max,...) all operands need
+  // to be divisible.
+  if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
+    for (auto *OpExpr : NAryExpr->operands())
+      if (!isDivisible(OpExpr, Size, SE))
+        return false;
+    return true;
+  }
+
+  auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
+  auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
+  auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
+  return MulSCEV == Expr;
+}
+
+void ScopBuilder::foldSizeConstantsToRight() {
+  isl::union_set Accessed = scop->getAccesses().range();
+
+  for (auto Array : scop->arrays()) {
+    if (Array->getNumberOfDimensions() <= 1)
+      continue;
+
+    isl::space Space = Array->getSpace();
+    Space = Space.align_params(Accessed.get_space());
+
+    if (!Accessed.contains(Space))
+      continue;
+
+    isl::set Elements = Accessed.extract_set(Space);
+    isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
+
+    std::vector<int> Int;
+    int Dims = Elements.dim(isl::dim::set);
+    for (int i = 0; i < Dims; i++) {
+      isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
+      DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
+      DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
+
+      isl::basic_set DimHull = DimOnly.affine_hull();
+
+      if (i == Dims - 1) {
+        Int.push_back(1);
+        Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
+        continue;
+      }
+
+      if (DimHull.dim(isl::dim::div) == 1) {
+        isl::aff Diff = DimHull.get_div(0);
+        isl::val Val = Diff.get_denominator_val();
+
+        int ValInt = 1;
+        if (Val.is_int()) {
+          auto ValAPInt = APIntFromVal(Val);
+          if (ValAPInt.isSignedIntN(32))
+            ValInt = ValAPInt.getSExtValue();
+        } else {
+        }
+
+        Int.push_back(ValInt);
+        isl::constraint C = isl::constraint::alloc_equality(
+            isl::local_space(Transform.get_space()));
+        C = C.set_coefficient_si(isl::dim::out, i, ValInt);
+        C = C.set_coefficient_si(isl::dim::in, i, -1);
+        Transform = Transform.add_constraint(C);
+        continue;
+      }
+
+      isl::basic_set ZeroSet = isl::basic_set(DimHull);
+      ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
+
+      int ValInt = 1;
+      if (ZeroSet.is_equal(DimHull)) {
+        ValInt = 0;
+      }
+
+      Int.push_back(ValInt);
+      Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
+    }
+
+    isl::set MappedElements = isl::map(Transform).domain();
+    if (!Elements.is_subset(MappedElements))
+      continue;
+
+    bool CanFold = true;
+    if (Int[0] <= 1)
+      CanFold = false;
+
+    unsigned NumDims = Array->getNumberOfDimensions();
+    for (unsigned i = 1; i < NumDims - 1; i++)
+      if (Int[0] != Int[i] && Int[i])
+        CanFold = false;
+
+    if (!CanFold)
+      continue;
+
+    for (auto &Access : scop->access_functions())
+      if (Access->getScopArrayInfo() == Array)
+        Access->setAccessRelation(
+            Access->getAccessRelation().apply_range(Transform));
+
+    std::vector<const SCEV *> Sizes;
+    for (unsigned i = 0; i < NumDims; i++) {
+      auto Size = Array->getDimensionSize(i);
+
+      if (i == NumDims - 1)
+        Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
+      Sizes.push_back(Size);
+    }
+
+    Array->updateSizes(Sizes, false /* CheckConsistency */);
+  }
+}
+
+void ScopBuilder::markFortranArrays() {
+  for (ScopStmt &Stmt : *scop) {
+    for (MemoryAccess *MemAcc : Stmt) {
+      Value *FAD = MemAcc->getFortranArrayDescriptor();
+      if (!FAD)
+        continue;
+
+      // TODO: const_cast-ing to edit
+      ScopArrayInfo *SAI =
+          const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
+      assert(SAI && "memory access into a Fortran array does not "
+                    "have an associated ScopArrayInfo");
+      SAI->applyAndSetFAD(FAD);
+    }
+  }
+}
+
+void ScopBuilder::finalizeAccesses() {
+  updateAccessDimensionality();
+  foldSizeConstantsToRight();
+  foldAccessRelations();
+  assumeNoOutOfBounds();
+  markFortranArrays();
+}
+
+void ScopBuilder::updateAccessDimensionality() {
+  // Check all array accesses for each base pointer and find a (virtual) element
+  // size for the base pointer that divides all access functions.
+  for (ScopStmt &Stmt : *scop)
+    for (MemoryAccess *Access : Stmt) {
+      if (!Access->isArrayKind())
+        continue;
+      ScopArrayInfo *Array =
+          const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
+
+      if (Array->getNumberOfDimensions() != 1)
+        continue;
+      unsigned DivisibleSize = Array->getElemSizeInBytes();
+      const SCEV *Subscript = Access->getSubscript(0);
+      while (!isDivisible(Subscript, DivisibleSize, SE))
+        DivisibleSize /= 2;
+      auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
+      Array->updateElementType(Ty);
+    }
+
+  for (auto &Stmt : *scop)
+    for (auto &Access : Stmt)
+      Access->updateDimensionality();
+}
+
+void ScopBuilder::foldAccessRelations() {
+  for (auto &Stmt : *scop)
+    for (auto &Access : Stmt)
+      Access->foldAccessRelation();
+}
+
+void ScopBuilder::assumeNoOutOfBounds() {
+  for (auto &Stmt : *scop)
+    for (auto &Access : Stmt)
+      Access->assumeNoOutOfBound();
+}
+
 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
   // Find the statement that defines the value of Inst. That statement has to
   // write the value to make it available to those statements that read it.
@@ -2367,7 +2556,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
 
   scop->buildSchedule(LI);
 
-  scop->finalizeAccesses();
+  finalizeAccesses();
 
   scop->realignParams();
   addUserContext();
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 9ccd9bf1680fd..c9a780ad987ca 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -848,35 +848,6 @@ void MemoryAccess::foldAccessRelation() {
   }
 }
 
-/// Check if @p Expr is divisible by @p Size.
-static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
-  assert(Size != 0);
-  if (Size == 1)
-    return true;
-
-  // Only one factor needs to be divisible.
-  if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
-    for (auto *FactorExpr : MulExpr->operands())
-      if (isDivisible(FactorExpr, Size, SE))
-        return true;
-    return false;
-  }
-
-  // For other n-ary expressions (Add, AddRec, Max,...) all operands need
-  // to be divisible.
-  if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
-    for (auto *OpExpr : NAryExpr->operands())
-      if (!isDivisible(OpExpr, Size, SE))
-        return false;
-    return true;
-  }
-
-  auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
-  auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
-  auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
-  return MulSCEV == Expr;
-}
-
 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) {
   assert(AccessRelation.is_null() && "AccessRelation already built");
 
@@ -2836,166 +2807,6 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
 
 Scop::~Scop() = default;
 
-void Scop::foldSizeConstantsToRight() {
-  isl::union_set Accessed = getAccesses().range();
-
-  for (auto Array : arrays()) {
-    if (Array->getNumberOfDimensions() <= 1)
-      continue;
-
-    isl::space Space = Array->getSpace();
-    Space = Space.align_params(Accessed.get_space());
-
-    if (!Accessed.contains(Space))
-      continue;
-
-    isl::set Elements = Accessed.extract_set(Space);
-    isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
-
-    std::vector<int> Int;
-    int Dims = Elements.dim(isl::dim::set);
-    for (int i = 0; i < Dims; i++) {
-      isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
-      DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
-      DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
-
-      isl::basic_set DimHull = DimOnly.affine_hull();
-
-      if (i == Dims - 1) {
-        Int.push_back(1);
-        Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
-        continue;
-      }
-
-      if (DimHull.dim(isl::dim::div) == 1) {
-        isl::aff Diff = DimHull.get_div(0);
-        isl::val Val = Diff.get_denominator_val();
-
-        int ValInt = 1;
-        if (Val.is_int()) {
-          auto ValAPInt = APIntFromVal(Val);
-          if (ValAPInt.isSignedIntN(32))
-            ValInt = ValAPInt.getSExtValue();
-        } else {
-        }
-
-        Int.push_back(ValInt);
-        isl::constraint C = isl::constraint::alloc_equality(
-            isl::local_space(Transform.get_space()));
-        C = C.set_coefficient_si(isl::dim::out, i, ValInt);
-        C = C.set_coefficient_si(isl::dim::in, i, -1);
-        Transform = Transform.add_constraint(C);
-        continue;
-      }
-
-      isl::basic_set ZeroSet = isl::basic_set(DimHull);
-      ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
-
-      int ValInt = 1;
-      if (ZeroSet.is_equal(DimHull)) {
-        ValInt = 0;
-      }
-
-      Int.push_back(ValInt);
-      Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
-    }
-
-    isl::set MappedElements = isl::map(Transform).domain();
-    if (!Elements.is_subset(MappedElements))
-      continue;
-
-    bool CanFold = true;
-    if (Int[0] <= 1)
-      CanFold = false;
-
-    unsigned NumDims = Array->getNumberOfDimensions();
-    for (unsigned i = 1; i < NumDims - 1; i++)
-      if (Int[0] != Int[i] && Int[i])
-        CanFold = false;
-
-    if (!CanFold)
-      continue;
-
-    for (auto &Access : AccessFunctions)
-      if (Access->getScopArrayInfo() == Array)
-        Access->setAccessRelation(
-            Access->getAccessRelation().apply_range(Transform));
-
-    std::vector<const SCEV *> Sizes;
-    for (unsigned i = 0; i < NumDims; i++) {
-      auto Size = Array->getDimensionSize(i);
-
-      if (i == NumDims - 1)
-        Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0]));
-      Sizes.push_back(Size);
-    }
-
-    Array->updateSizes(Sizes, false /* CheckConsistency */);
-  }
-}
-
-void Scop::markFortranArrays() {
-  for (ScopStmt &Stmt : Stmts) {
-    for (MemoryAccess *MemAcc : Stmt) {
-      Value *FAD = MemAcc->getFortranArrayDescriptor();
-      if (!FAD)
-        continue;
-
-      // TODO: const_cast-ing to edit
-      ScopArrayInfo *SAI =
-          const_cast<ScopArrayInfo *>(MemAcc->getLatestScopArrayInfo());
-      assert(SAI && "memory access into a Fortran array does not "
-                    "have an associated ScopArrayInfo");
-      SAI->applyAndSetFAD(FAD);
-    }
-  }
-}
-
-void Scop::finalizeAccesses() {
-  updateAccessDimensionality();
-  foldSizeConstantsToRight();
-  foldAccessRelations();
-  assumeNoOutOfBounds();
-  markFortranArrays();
-}
-
-void Scop::updateAccessDimensionality() {
-  // Check all array accesses for each base pointer and find a (virtual) element
-  // size for the base pointer that divides all access functions.
-  for (ScopStmt &Stmt : *this)
-    for (MemoryAccess *Access : Stmt) {
-      if (!Access->isArrayKind())
-        continue;
-      ScopArrayInfo *Array =
-          const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
-
-      if (Array->getNumberOfDimensions() != 1)
-        continue;
-      unsigned DivisibleSize = Array->getElemSizeInBytes();
-      const SCEV *Subscript = Access->getSubscript(0);
-      while (!isDivisible(Subscript, DivisibleSize, *SE))
-        DivisibleSize /= 2;
-      auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8);
-      Array->updateElementType(Ty);
-    }
-
-  for (auto &Stmt : *this)
-    for (auto &Access : Stmt)
-      Access->updateDimensionality();
-}
-
-void Scop::foldAccessRelations() {
-  for (auto &Stmt : *this)
-    for (auto &Access : Stmt)
-      Access->foldAccessRelation();
-}
-
-void Scop::assumeNoOutOfBounds() {
-  for (auto &Stmt : *this)
-    for (auto &Access : Stmt)
-      Access->assumeNoOutOfBound();
-}
-
 void Scop::removeFromStmtMap(ScopStmt &Stmt) {
   for (Instruction *Inst : Stmt.getInstructions())
     InstStmtMap.erase(Inst);

From f45fd429b7184845f7f6567227df32ebb94a04af Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Wed, 17 Jul 2019 21:27:44 +0000
Subject: [PATCH 393/451] Speculative fix for stack-tagging.ll failure.

Depending on the evaluation order of function call arguments,
the current code may insert a use before def.

llvm-svn: 366375
---
 llvm/lib/Target/AArch64/AArch64StackTagging.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index f27a6311365af..6e99c48bf1d7b 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -316,8 +316,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
       untagAlloca(AI, Info.LifetimeEnd[0], Size);
     } else {
       uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
-      tagAlloca(AI, TagPCall->getNextNode(),
-                IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy()), Size);
+      Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
+      tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
       for (auto &RI : RetVec) {
         untagAlloca(AI, RI, Size);
       }

From 9c7f4264d352316cd4213f78c63c43d830b95752 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 17 Jul 2019 21:35:11 +0000
Subject: [PATCH 394/451] [AMDGPU] Stop special casing flat_scratch for
 register name

Differential Revision: https://reviews.llvm.org/D64885

llvm-svn: 366376
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 12 ------------
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td  |  2 +-
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 483793fe4dcbb..f152deb280041 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1347,18 +1347,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 }
 
 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
-  // FIXME: Rename flat_scr so we don't need to special case this.
-  switch (Reg) {
-  case AMDGPU::FLAT_SCR:
-    return "flat_scratch";
-  case AMDGPU::FLAT_SCR_LO:
-    return "flat_scratch_lo";
-  case AMDGPU::FLAT_SCR_HI:
-    return "flat_scratch_hi";
-  default:
-    break;
-  }
-
   const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg);
   unsigned Size = getRegSizeInBits(*RC);
   unsigned AltName = AMDGPU::NoRegAltName;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 353347073b877..59a2d89ca2c2c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -171,7 +171,7 @@ foreach Index = 0-15 in {
 multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
   def _ci : SIReg<n, ci_e>;
   def _vi : SIReg<n, vi_e>;
-  def "" : SIReg<"", 0>;
+  def "" : SIReg<n, 0>;
 }
 
 class FlatReg <Register lo, Register hi, bits<16> encoding> :

From d0ac007f9a93d577f3e829812444bdae8b26269d Mon Sep 17 00:00:00 2001
From: Dominik Adamski <adamski.dominik@gmail.com>
Date: Wed, 17 Jul 2019 21:42:39 +0000
Subject: [PATCH 395/451] [NFC][ScopBuilder] Move buildSchedule and its callees
 to ScopBuilder or ScopHelper

Scope of changes:
1. Moved buildSchedule functions to ScopBuilder.
2. Moved combineInSequence function to ScopBuilder.
3. Moved mapToDimension function to ScopBuilder.
4. Moved LoopStackTy to ScopBuilder.
5. Moved getLoopSurroundingScop to ScopHelper.
6. Moved getNumBlocksInLoop to ScopHelper.
7. Moved getNumBlocksInRegionNode to ScopHelper.
8. Moved getRegionNodeLoop to ScopHelper.

Differential Revision: https://reviews.llvm.org/D64223

llvm-svn: 366377
---
 polly/include/polly/ScopBuilder.h        |  56 +++++
 polly/include/polly/ScopInfo.h           |  60 ------
 polly/include/polly/Support/ScopHelper.h |  22 ++
 polly/lib/Analysis/ScopBuilder.cpp       | 176 ++++++++++++++-
 polly/lib/Analysis/ScopInfo.cpp          | 259 -----------------------
 polly/lib/Support/ScopHelper.cpp         |  74 +++++++
 6 files changed, 327 insertions(+), 320 deletions(-)

diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h
index a6c8c0eb893be..415190b5dfda9 100644
--- a/polly/include/polly/ScopBuilder.h
+++ b/polly/include/polly/ScopBuilder.h
@@ -584,6 +584,62 @@ class ScopBuilder {
   /// have been hoisted as loop invariant.
   void canonicalizeDynamicBasePtrs();
 
+  /// Construct the schedule of this SCoP.
+  void buildSchedule();
+
+  /// A loop stack element to keep track of per-loop information during
+  ///        schedule construction.
+  using LoopStackElementTy = struct LoopStackElement {
+    // The loop for which we keep information.
+    Loop *L;
+
+    // The (possibly incomplete) schedule for this loop.
+    isl::schedule Schedule;
+
+    // The number of basic blocks in the current loop, for which a schedule has
+    // already been constructed.
+    unsigned NumBlocksProcessed;
+
+    LoopStackElement(Loop *L, isl::schedule S, unsigned NumBlocksProcessed)
+        : L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed) {}
+  };
+
+  /// The loop stack used for schedule construction.
+  ///
+  /// The loop stack keeps track of schedule information for a set of nested
+  /// loops as well as an (optional) 'nullptr' loop that models the outermost
+  /// schedule dimension. The loops in a loop stack always have a parent-child
+  /// relation where the loop at position n is the parent of the loop at
+  /// position n + 1.
+  using LoopStackTy = SmallVector<LoopStackElementTy, 4>;
+
+  /// Construct schedule information for a given Region and add the
+  ///        derived information to @p LoopStack.
+  ///
+  /// Given a Region we derive schedule information for all RegionNodes
+  /// contained in this region ensuring that the assigned execution times
+  /// correctly model the existing control flow relations.
+  ///
+  /// @param R              The region which to process.
+  /// @param LoopStack      A stack of loops that are currently under
+  ///                       construction.
+  void buildSchedule(Region *R, LoopStackTy &LoopStack);
+
+  /// Build Schedule for the region node @p RN and add the derived
+  ///        information to @p LoopStack.
+  ///
+  /// In case @p RN is a BasicBlock or a non-affine Region, we construct the
+  /// schedule for this @p RN and also finalize loop schedules in case the
+  /// current @p RN completes the loop.
+  ///
+  /// In case @p RN is a not-non-affine Region, we delegate the construction to
+  /// buildSchedule(Region *R, ...).
+  ///
+  /// @param RN             The RegionNode region traversed.
+  /// @param LoopStack      A stack of loops that are currently under
+  ///                       construction.
+  void buildSchedule(RegionNode *RN, LoopStackTy &LoopStack);
+
 public:
   explicit ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
                        const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h
index 0e523bc9dee31..9405d8c26c617 100644
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@@ -2109,66 +2109,6 @@ class Scop {
   /// have a corresponding domain in the domain map (or it is empty).
   void removeStmtNotInDomainMap();
 
-  /// Construct the schedule of this SCoP.
-  ///
-  /// @param LI The LoopInfo for the current function.
-  void buildSchedule(LoopInfo &LI);
-
-  /// A loop stack element to keep track of per-loop information during
-  ///        schedule construction.
-  using LoopStackElementTy = struct LoopStackElement {
-    // The loop for which we keep information.
-    Loop *L;
-
-    // The (possibly incomplete) schedule for this loop.
-    isl::schedule Schedule;
-
-    // The number of basic blocks in the current loop, for which a schedule has
-    // already been constructed.
-    unsigned NumBlocksProcessed;
-
-    LoopStackElement(Loop *L, isl::schedule S, unsigned NumBlocksProcessed)
-        : L(L), Schedule(S), NumBlocksProcessed(NumBlocksProcessed) {}
-  };
-
-  /// The loop stack used for schedule construction.
-  ///
-  /// The loop stack keeps track of schedule information for a set of nested
-  /// loops as well as an (optional) 'nullptr' loop that models the outermost
-  /// schedule dimension. The loops in a loop stack always have a parent-child
-  /// relation where the loop at position n is the parent of the loop at
-  /// position n + 1.
-  using LoopStackTy = SmallVector<LoopStackElementTy, 4>;
-
-  /// Construct schedule information for a given Region and add the
-  ///        derived information to @p LoopStack.
-  ///
-  /// Given a Region we derive schedule information for all RegionNodes
-  /// contained in this region ensuring that the assigned execution times
-  /// correctly model the existing control flow relations.
-  ///
-  /// @param R              The region which to process.
-  /// @param LoopStack      A stack of loops that are currently under
-  ///                       construction.
-  /// @param LI The LoopInfo for the current function.
-  void buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI);
-
-  /// Build Schedule for the region node @p RN and add the derived
-  ///        information to @p LoopStack.
-  ///
-  /// In case @p RN is a BasicBlock or a non-affine Region, we construct the
-  /// schedule for this @p RN and also finalize loop schedules in case the
-  /// current @p RN completes the loop.
-  ///
-  /// In case @p RN is a not-non-affine Region, we delegate the construction to
-  /// buildSchedule(Region *R, ...).
-  ///
-  /// @param RN             The RegionNode region traversed.
-  /// @param LoopStack      A stack of loops that are currently under
-  ///                       construction.
-  /// @param LI The LoopInfo for the current function.
-  void buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI);
-
   /// Collect all memory access relations of a given type.
   ///
   /// @param Predicate A predicate function that returns true if an access is
diff --git a/polly/include/polly/Support/ScopHelper.h b/polly/include/polly/Support/ScopHelper.h
index 02c669a1f3e2d..8d794e0bf7863 100644
--- a/polly/include/polly/Support/ScopHelper.h
+++ b/polly/include/polly/Support/ScopHelper.h
@@ -27,6 +27,7 @@ class Region;
 class Pass;
 class DominatorTree;
 class RegionInfo;
+class RegionNode;
 } // namespace llvm
 
 namespace polly {
@@ -379,6 +380,27 @@ bool isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R,
 /// @return The condition of @p TI and nullptr if none could be extracted.
 llvm::Value *getConditionFromTerminator(llvm::Instruction *TI);
 
+/// Get the smallest loop that contains @p S but is not in @p S.
+llvm::Loop *getLoopSurroundingScop(Scop &S, llvm::LoopInfo &LI);
+
+/// Get the number of blocks in @p L.
+///
+/// The number of blocks in a loop are the number of basic blocks actually
+/// belonging to the loop, as well as all single basic blocks that the loop
+/// exits to and which terminate in an unreachable instruction. We do not
+/// allow such basic blocks in the exit of a scop, hence they belong to the
+/// scop and represent run-time conditions which we want to model and
+/// subsequently speculate away.
+///
+/// @see getRegionNodeLoop for additional details.
+unsigned getNumBlocksInLoop(llvm::Loop *L);
+
+/// Get the number of blocks in @p RN.
+unsigned getNumBlocksInRegionNode(llvm::RegionNode *RN);
+
+/// Return the smallest loop surrounding @p RN.
+llvm::Loop *getRegionNodeLoop(llvm::RegionNode *RN, llvm::LoopInfo &LI);
+
 /// Check if @p LInst can be hoisted in @p R.
 ///
 /// @param LInst The load to check.
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index b2fdd0731be31..d301cfe440138 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -24,6 +24,7 @@
 #include "polly/Support/VirtualInstruction.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Loads.h"
@@ -222,6 +223,179 @@ void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
     ensureValueRead(Op.get(), UserStmt);
 }
 
+// Create a sequence of two schedules. Either argument may be null and is
+// interpreted as the empty schedule. Can also return null if both schedules are
+// empty.
+static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
+  if (!Prev)
+    return Succ;
+  if (!Succ)
+    return Prev;
+
+  return Prev.sequence(Succ);
+}
+
+// Create an isl_multi_union_aff that defines an identity mapping from the
+// elements of USet to their N-th dimension.
+//
+// # Example:
+//
+//            Domain: { A[i,j]; B[i,j,k] }
+//                 N: 1
+//
+// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
+//
+// @param USet   A union set describing the elements for which to generate a
+//               mapping.
+// @param N      The dimension to map to.
+// @returns      A mapping from USet to its N-th dimension.
+static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
+  assert(N >= 0);
+  assert(USet);
+  assert(!USet.is_empty());
+
+  auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
+
+  for (isl::set S : USet.get_set_list()) {
+    int Dim = S.dim(isl::dim::set);
+    auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
+                                                  N, Dim - N);
+    if (N > 1)
+      PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
+
+    Result = Result.add_pw_multi_aff(PMA);
+  }
+
+  return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
+}
+
+void ScopBuilder::buildSchedule() {
+  Loop *L = getLoopSurroundingScop(*scop, LI);
+  LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
+  buildSchedule(scop->getRegion().getNode(), LoopStack);
+  assert(LoopStack.size() == 1 && LoopStack.back().L == L);
+  scop->setScheduleTree(LoopStack[0].Schedule);
+}
+
+/// To generate a schedule for the elements in a Region we traverse the Region
+/// in reverse-post-order and add the contained RegionNodes in traversal order
+/// to the schedule of the loop that is currently at the top of the LoopStack.
+/// For loop-free codes, this results in a correct sequential ordering.
+///
+/// Example:
+///           bb1(0)
+///         /     \.
+///      bb2(1)   bb3(2)
+///         \    /  \.
+///          bb4(3)  bb5(4)
+///             \   /
+///              bb6(5)
+///
+/// Including loops requires additional processing. Whenever a loop header is
+/// encountered, the corresponding loop is added to the @p LoopStack. Starting
+/// from an empty schedule, we first process all RegionNodes that are within
+/// this loop and complete the sequential schedule at this loop-level before
+/// processing about any other nodes. To implement this
+/// loop-nodes-first-processing, the reverse post-order traversal is
+/// insufficient. Hence, we additionally check if the traversal yields
+/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
+/// These region-nodes are then queue and only traverse after the all nodes
+/// within the current loop have been processed.
+void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
+  Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
+
+  ReversePostOrderTraversal<Region *> RTraversal(R);
+  std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
+  std::deque<RegionNode *> DelayList;
+  bool LastRNWaiting = false;
+
+  // Iterate over the region @p R in reverse post-order but queue
+  // sub-regions/blocks iff they are not part of the last encountered but not
+  // completely traversed loop. The variable LastRNWaiting is a flag to indicate
+  // that we queued the last sub-region/block from the reverse post-order
+  // iterator. If it is set we have to explore the next sub-region/block from
+  // the iterator (if any) to guarantee progress. If it is not set we first try
+  // the next queued sub-region/blocks.
+  while (!WorkList.empty() || !DelayList.empty()) {
+    RegionNode *RN;
+
+    if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
+      RN = WorkList.front();
+      WorkList.pop_front();
+      LastRNWaiting = false;
+    } else {
+      RN = DelayList.front();
+      DelayList.pop_front();
+    }
+
+    Loop *L = getRegionNodeLoop(RN, LI);
+    if (!scop->contains(L))
+      L = OuterScopLoop;
+
+    Loop *LastLoop = LoopStack.back().L;
+    if (LastLoop != L) {
+      if (LastLoop && !LastLoop->contains(L)) {
+        LastRNWaiting = true;
+        DelayList.push_back(RN);
+        continue;
+      }
+      LoopStack.push_back({L, nullptr, 0});
+    }
+    buildSchedule(RN, LoopStack);
+  }
+}
+
+void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
+  if (RN->isSubRegion()) {
+    auto *LocalRegion = RN->getNodeAs<Region>();
+    if (!scop->isNonAffineSubRegion(LocalRegion)) {
+      buildSchedule(LocalRegion, LoopStack);
+      return;
+    }
+  }
+
+  assert(LoopStack.rbegin() != LoopStack.rend());
+  auto LoopData = LoopStack.rbegin();
+  LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
+
+  for (auto *Stmt : scop->getStmtListFor(RN)) {
+    isl::union_set UDomain{Stmt->getDomain()};
+    auto StmtSchedule = isl::schedule::from_domain(UDomain);
+    LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
+  }
+
+  // Check if we just processed the last node in this loop. If we did, finalize
+  // the loop by:
+  //
+  //   - adding new schedule dimensions
+  //   - folding the resulting schedule into the parent loop schedule
+  //   - dropping the loop schedule from the LoopStack.
+  //
+  // Then continue to check surrounding loops, which might also have been
+  // completed by this node.
+  size_t Dimension = LoopStack.size();
+  while (LoopData->L &&
+         LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
+    isl::schedule Schedule = LoopData->Schedule;
+    auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
+
+    assert(std::next(LoopData) != LoopStack.rend());
+    ++LoopData;
+    --Dimension;
+
+    if (Schedule) {
+      isl::union_set Domain = Schedule.get_domain();
+      isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
+      Schedule = Schedule.insert_partial_schedule(MUPA);
+      LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
+    }
+
+    LoopData->NumBlocksProcessed += NumBlocksProcessed;
+  }
+  // Now pop all loops processed up there from the LoopStack
+  LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
+}
+
 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
   // Check for uses of this instruction outside the scop. Because we do not
   // iterate over such instructions and therefore did not "ensure" the existence
@@ -2554,7 +2728,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
     return;
   }
 
-  scop->buildSchedule(LI);
+  buildSchedule();
 
   finalizeAccesses();
 
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index c9a780ad987ca..199729e38e02d 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -163,18 +163,6 @@ static cl::opt<bool> PollyPrintInstructions(
 
 //===----------------------------------------------------------------------===//
 
-// Create a sequence of two schedules. Either argument may be null and is
-// interpreted as the empty schedule. Can also return null if both schedules are
-// empty.
-static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
-  if (!Prev)
-    return Succ;
-  if (!Succ)
-    return Prev;
-
-  return Prev.sequence(Succ);
-}
-
 static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range,
                                     int dim, isl::dim type) {
   isl::val V;
@@ -2125,72 +2113,6 @@ getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
   return TI->getSuccessor(idx);
 }
 
-/// Return the smallest loop surrounding @p RN.
-static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
-  if (!RN->isSubRegion()) {
-    BasicBlock *BB = RN->getNodeAs<BasicBlock>();
-    Loop *L = LI.getLoopFor(BB);
-
-    // Unreachable statements are not considered to belong to a LLVM loop, as
-    // they are not part of an actual loop in the control flow graph.
-    // Nevertheless, we handle certain unreachable statements that are common
-    // when modeling run-time bounds checks as being part of the loop to be
-    // able to model them and to later eliminate the run-time bounds checks.
-    //
-    // Specifically, for basic blocks that terminate in an unreachable and
-    // where the immediate predecessor is part of a loop, we assume these
-    // basic blocks belong to the loop the predecessor belongs to. This
-    // allows us to model the following code.
-    //
-    // for (i = 0; i < N; i++) {
-    //   if (i > 1024)
-    //     abort();            <- this abort might be translated to an
-    //                            unreachable
-    //
-    //   A[i] = ...
-    // }
-    if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
-      L = LI.getLoopFor(BB->getPrevNode());
-    return L;
-  }
-
-  Region *NonAffineSubRegion = RN->getNodeAs<Region>();
-  Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
-  while (L && NonAffineSubRegion->contains(L))
-    L = L->getParentLoop();
-  return L;
-}
-
-/// Get the number of blocks in @p L.
-///
-/// The number of blocks in a loop are the number of basic blocks actually
-/// belonging to the loop, as well as all single basic blocks that the loop
-/// exits to and which terminate in an unreachable instruction. We do not
-/// allow such basic blocks in the exit of a scop, hence they belong to the
-/// scop and represent run-time conditions which we want to model and
-/// subsequently speculate away.
-///
-/// @see getRegionNodeLoop for additional details.
-unsigned getNumBlocksInLoop(Loop *L) {
-  unsigned NumBlocks = L->getNumBlocks();
-  SmallVector<BasicBlock *, 4> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-
-  for (auto ExitBlock : ExitBlocks) {
-    if (isa<UnreachableInst>(ExitBlock->getTerminator()))
-      NumBlocks++;
-  }
-  return NumBlocks;
-}
-
-static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) {
-  if (!RN->isSubRegion())
-    return 1;
-
-  Region *R = RN->getNodeAs<Region>();
-  return std::distance(R->block_begin(), R->block_end());
-}
-
 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI,
                                const DominatorTree &DT) {
   if (!RN->isSubRegion())
@@ -2761,26 +2683,6 @@ bool Scop::addLoopBoundsToHeaderDomain(
   return true;
 }
 
-/// Get the smallest loop that contains @p S but is not in @p S.
-static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
-  // Start with the smallest loop containing the entry and expand that
-  // loop until it contains all blocks in the region. If there is a loop
-  // containing all blocks in the region check if it is itself contained
-  // and if so take the parent loop as it will be the smallest containing
-  // the region but not contained by it.
-  Loop *L = LI.getLoopFor(S.getEntry());
-  while (L) {
-    bool AllContained = true;
-    for (auto *BB : S.blocks())
-      AllContained &= L->contains(BB);
-    if (AllContained)
-      break;
-    L = L->getParentLoop();
-  }
-
-  return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
-}
-
 int Scop::NextScopID = 0;
 
 std::string Scop::CurrentFunc;
@@ -3463,40 +3365,6 @@ bool Scop::restrictDomains(isl::union_set Domain) {
 
 ScalarEvolution *Scop::getSE() const { return SE; }
 
-// Create an isl_multi_union_aff that defines an identity mapping from the
-// elements of USet to their N-th dimension.
-//
-// # Example:
-//
-//            Domain: { A[i,j]; B[i,j,k] }
-//                 N: 1
-//
-// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
-//
-// @param USet   A union set describing the elements for which to generate a
-//               mapping.
-// @param N      The dimension to map to.
-// @returns      A mapping from USet to its N-th dimension.
-static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
-  assert(N >= 0);
-  assert(USet);
-  assert(!USet.is_empty());
-
-  auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
-
-  for (isl::set S : USet.get_set_list()) {
-    int Dim = S.dim(isl::dim::set);
-    auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
-                                                  N, Dim - N);
-    if (N > 1)
-      PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
-
-    Result = Result.add_pw_multi_aff(PMA);
-  }
-
-  return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
-}
-
 void Scop::addScopStmt(BasicBlock *BB, StringRef Name, Loop *SurroundingLoop,
                        std::vector<Instruction *> Instructions) {
   assert(BB && "Unexpected nullptr!");
@@ -3549,133 +3417,6 @@ ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel,
   return &(Stmts.back());
 }
 
-void Scop::buildSchedule(LoopInfo &LI) {
-  Loop *L = getLoopSurroundingScop(*this, LI);
-  LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)});
-  buildSchedule(getRegion().getNode(), LoopStack, LI);
-  assert(LoopStack.size() == 1 && LoopStack.back().L == L);
-  Schedule = LoopStack[0].Schedule;
-}
-
-/// To generate a schedule for the elements in a Region we traverse the Region
-/// in reverse-post-order and add the contained RegionNodes in traversal order
-/// to the schedule of the loop that is currently at the top of the LoopStack.
-/// For loop-free codes, this results in a correct sequential ordering.
-///
-/// Example:
-///           bb1(0)
-///         /     \.
-///      bb2(1)   bb3(2)
-///         \    /  \.
-///          bb4(3)  bb5(4)
-///             \   /
-///              bb6(5)
-///
-/// Including loops requires additional processing. Whenever a loop header is
-/// encountered, the corresponding loop is added to the @p LoopStack. Starting
-/// from an empty schedule, we first process all RegionNodes that are within
-/// this loop and complete the sequential schedule at this loop-level before
-/// processing about any other nodes. To implement this
-/// loop-nodes-first-processing, the reverse post-order traversal is
-/// insufficient. Hence, we additionally check if the traversal yields
-/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
-/// These region-nodes are then queue and only traverse after the all nodes
-/// within the current loop have been processed.
-void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) {
-  Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI);
-
-  ReversePostOrderTraversal<Region *> RTraversal(R);
-  std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
-  std::deque<RegionNode *> DelayList;
-  bool LastRNWaiting = false;
-
-  // Iterate over the region @p R in reverse post-order but queue
-  // sub-regions/blocks iff they are not part of the last encountered but not
-  // completely traversed loop. The variable LastRNWaiting is a flag to indicate
-  // that we queued the last sub-region/block from the reverse post-order
-  // iterator. If it is set we have to explore the next sub-region/block from
-  // the iterator (if any) to guarantee progress. If it is not set we first try
-  // the next queued sub-region/blocks.
-  while (!WorkList.empty() || !DelayList.empty()) {
-    RegionNode *RN;
-
-    if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
-      RN = WorkList.front();
-      WorkList.pop_front();
-      LastRNWaiting = false;
-    } else {
-      RN = DelayList.front();
-      DelayList.pop_front();
-    }
-
-    Loop *L = getRegionNodeLoop(RN, LI);
-    if (!contains(L))
-      L = OuterScopLoop;
-
-    Loop *LastLoop = LoopStack.back().L;
-    if (LastLoop != L) {
-      if (LastLoop && !LastLoop->contains(L)) {
-        LastRNWaiting = true;
-        DelayList.push_back(RN);
-        continue;
-      }
-      LoopStack.push_back({L, nullptr, 0});
-    }
-    buildSchedule(RN, LoopStack, LI);
-  }
-}
-
-void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) {
-  if (RN->isSubRegion()) {
-    auto *LocalRegion = RN->getNodeAs<Region>();
-    if (!isNonAffineSubRegion(LocalRegion)) {
-      buildSchedule(LocalRegion, LoopStack, LI);
-      return;
-    }
-  }
-
-  assert(LoopStack.rbegin() != LoopStack.rend());
-  auto LoopData = LoopStack.rbegin();
-  LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
-
-  for (auto *Stmt : getStmtListFor(RN)) {
-    isl::union_set UDomain{Stmt->getDomain()};
-    auto StmtSchedule = isl::schedule::from_domain(UDomain);
-    LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
-  }
-
-  // Check if we just processed the last node in this loop. If we did, finalize
-  // the loop by:
-  //
-  //   - adding new schedule dimensions
-  //   - folding the resulting schedule into the parent loop schedule
-  //   - dropping the loop schedule from the LoopStack.
-  //
-  // Then continue to check surrounding loops, which might also have been
-  // completed by this node.
-  size_t Dimension = LoopStack.size();
-  while (LoopData->L &&
-         LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
-    isl::schedule Schedule = LoopData->Schedule;
-    auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
-
-    assert(std::next(LoopData) != LoopStack.rend());
-    ++LoopData;
-    --Dimension;
-
-    if (Schedule) {
-      isl::union_set Domain = Schedule.get_domain();
-      isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
-      Schedule = Schedule.insert_partial_schedule(MUPA);
-      LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
-    }
-
-    LoopData->NumBlocksProcessed += NumBlocksProcessed;
-  }
-  // Now pop all loops processed up there from the LoopStack
-  LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
-}
-
 ArrayRef<ScopStmt *> Scop::getStmtListFor(BasicBlock *BB) const {
   auto StmtMapIt = StmtMap.find(BB);
   if (StmtMapIt == StmtMap.end())
diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp
index e65c47298ed88..5f80afd082113 100644
--- a/polly/lib/Support/ScopHelper.cpp
+++ b/polly/lib/Support/ScopHelper.cpp
@@ -461,6 +461,80 @@ Value *polly::getConditionFromTerminator(Instruction *TI) {
   return nullptr;
 }
 
+Loop *polly::getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
+  // Start with the smallest loop containing the entry and expand that
+  // loop until it contains all blocks in the region. If there is a loop
+  // containing all blocks in the region check if it is itself contained
+  // and if so take the parent loop as it will be the smallest containing
+  // the region but not contained by it.
+  Loop *L = LI.getLoopFor(S.getEntry());
+  while (L) {
+    bool AllContained = true;
+    for (auto *BB : S.blocks())
+      AllContained &= L->contains(BB);
+    if (AllContained)
+      break;
+    L = L->getParentLoop();
+  }
+
+  return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr;
+}
+
+unsigned polly::getNumBlocksInLoop(Loop *L) {
+  unsigned NumBlocks = L->getNumBlocks();
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+
+  for (auto ExitBlock : ExitBlocks) {
+    if (isa<UnreachableInst>(ExitBlock->getTerminator()))
+      NumBlocks++;
+  }
+  return NumBlocks;
+}
+
+unsigned polly::getNumBlocksInRegionNode(RegionNode *RN) {
+  if (!RN->isSubRegion())
+    return 1;
+
+  Region *R = RN->getNodeAs<Region>();
+  return std::distance(R->block_begin(), R->block_end());
+}
+
+Loop *polly::getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) {
+  if (!RN->isSubRegion()) {
+    BasicBlock *BB = RN->getNodeAs<BasicBlock>();
+    Loop *L = LI.getLoopFor(BB);
+
+    // Unreachable statements are not considered to belong to a LLVM loop, as
+    // they are not part of an actual loop in the control flow graph.
+    // Nevertheless, we handle certain unreachable statements that are common
+    // when modeling run-time bounds checks as being part of the loop to be
+    // able to model them and to later eliminate the run-time bounds checks.
+    //
+    // Specifically, for basic blocks that terminate in an unreachable and
+    // where the immediate predecessor is part of a loop, we assume these
+    // basic blocks belong to the loop the predecessor belongs to. This
+    // allows us to model the following code.
+    //
+    // for (i = 0; i < N; i++) {
+    //   if (i > 1024)
+    //     abort();            <- this abort might be translated to an
+    //                            unreachable
+    //
+    //   A[i] = ...
+    // }
+    if (!L && isa<UnreachableInst>(BB->getTerminator()) && BB->getPrevNode())
+      L = LI.getLoopFor(BB->getPrevNode());
+    return L;
+  }
+
+  Region *NonAffineSubRegion = RN->getNodeAs<Region>();
+  Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry());
+  while (L && NonAffineSubRegion->contains(L))
+    L = L->getParentLoop();
+  return L;
+}
+
 static bool hasVariantIndex(GetElementPtrInst *Gep, Loop *L, Region &R,
                             ScalarEvolution &SE) {
   for (const Use &Val : llvm::drop_begin(Gep->operands(), 1)) {

From be4be6120f10f557d32de5e33ff8233e17fe61de Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Wed, 17 Jul 2019 21:44:05 +0000
Subject: [PATCH 396/451] Add support to ProcessMachCore::DoLoadCore to handle
 an EFI UUID str.

If a core file has an EFI version string which includes a UUID
(similar to what it returns for the kdp KDP_KERNELVERSION packet)
in the LC_IDENT or LC_NOTE 'kern ver str' load command.  In that
case, we should try to find the binary and dSYM for the UUID
listed.  The dSYM may have python code which knows how to relocate
the binary to the correct address in lldb's target section load
list and loads other ancillary binaries.

The test case is a little involved,

1. it compiles an inferior hello world apple (a.out),
2. it compiles a program which can create a corefile manually
   with a specific binary's UUID encoded in it,
3. it gets the UUID of the a.out binary,
4. it creates a shell script, dsym-for-uuid.sh, which will
   return the full path to the a.out + a.out.dSYM when called
   with teh correct UUID,
5. it sets the LLDB_APPLE_DSYMFORUUID_EXECUTABLE env var before
   creating the lldb target, to point to this dsym-for-uuid.sh,
6. runs the create-corefile binary we compiled in step #2,
7. loads the corefile from step #6 into lldb,
8. verifies that lldb loaded a.out by reading the LC_NOTE
   load command from the corefile, calling dsym-for-uuid.sh with
   that UUID, got back the path to a.out and loaded it.

whew!

<rdar://problem/47562911>

llvm-svn: 366378
---
 .../test/macosx/lc-note/kern-ver-str/Makefile |  15 +
 .../kern-ver-str/TestKernVerStrLCNOTE.py      | 102 ++++++
 .../kern-ver-str/create-empty-corefile.cpp    | 315 ++++++++++++++++++
 .../kern-ver-str/create-empty-corefile.mk     |   8 +
 .../test/macosx/lc-note/kern-ver-str/main.c   |   2 +
 .../Process/mach-core/ProcessMachCore.cpp     |  55 +++
 6 files changed, 497 insertions(+)
 create mode 100644 lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/Makefile
 create mode 100644 lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py
 create mode 100644 lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.cpp
 create mode 100644 lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.mk
 create mode 100644 lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/main.c

diff --git a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/Makefile b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/Makefile
new file mode 100644
index 0000000000000..b440cf3218912
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/Makefile
@@ -0,0 +1,15 @@
+LEVEL = ../../../make
+
+MAKE_DSYM := NO
+
+C_SOURCES := main.c
+
+all: a.out create-empty-corefile
+
+create-empty-corefile:
+	$(MAKE) VPATH=$(VPATH) -f $(SRCDIR)/create-empty-corefile.mk
+
+clean::
+	$(MAKE) -f create-empty-corefile.mk clean
+
+include $(LEVEL)/Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py
new file mode 100644
index 0000000000000..2acde16cbfa63
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py
@@ -0,0 +1,102 @@
+"""Test that corefiles with an LC_NOTE "kern ver str" load command is used."""
+
+from __future__ import print_function
+
+
+import os
+import re
+import subprocess
+import sys
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestKernVerStrLCNOTE(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM")
+    @skipIfDarwinEmbedded
+    @skipUnlessDarwin
+    def test_lc_note(self):
+        self.build()
+        self.test_exe = self.getBuildArtifact("a.out")
+        self.create_corefile = self.getBuildArtifact("create-empty-corefile")
+        self.dsym_for_uuid = self.getBuildArtifact("dsym-for-uuid.sh")
+        self.corefile = self.getBuildArtifact("core")
+
+        ## We can hook in our dsym-for-uuid shell script to lldb with this env
+        ## var instead of requiring a defaults write.
+        os.environ['LLDB_APPLE_DSYMFORUUID_EXECUTABLE'] = self.dsym_for_uuid
+        self.addTearDownHook(lambda: os.environ.pop('LLDB_APPLE_DSYMFORUUID_EXECUTABLE', None))
+
+        dwarfdump_uuid_regex = re.compile(
+            'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
+        dwarfdump_cmd_output = subprocess.check_output(
+                ('/usr/bin/dwarfdump --uuid "%s"' % self.test_exe), shell=True).decode("utf-8")
+        aout_uuid = None
+        for line in dwarfdump_cmd_output.splitlines():
+            match = dwarfdump_uuid_regex.search(line)
+            if match:
+                aout_uuid = match.group(1)
+        self.assertNotEqual(aout_uuid, None, "Could not get uuid of built a.out")
+
+        ###  Create our dsym-for-uuid shell script which returns self.test_exe
+        ###  and its dSYM when given self.test_exe's UUID.
+        shell_cmds = [
+                '#! /bin/sh',
+                'ret=0',
+                'echo "<?xml version=\\"1.0\\" encoding=\\"UTF-8\\"?>"',
+                'echo "<!DOCTYPE plist PUBLIC \\"-//Apple//DTD PLIST 1.0//EN\\" \\"http://www.apple.com/DTDs/PropertyList-1.0.dtd\\">"',
+                'date >> /tmp/log',
+                'echo "<plist version=\\"1.0\\">"',
+                '',
+                '# the last arugment is probably the uuid',
+                'while [ $# -gt 1 ]',
+                'do',
+                '  shift',
+                'done',
+                'echo "<dict><key>$1</key><dict>"',
+                '',
+                'if [ "$1" = "%s" ]' % aout_uuid,
+                'then',
+                '  echo "<key>DBGArchitecture</key><string>x86_64</string>"',
+                '  echo "<key>DBGDSYMPath</key><string>%s.dSYM/Contents/Resources/DWARF/%s</string>"' % (self.test_exe, os.path.basename(self.test_exe)),
+                '  echo "<key>DBGSymbolRichExecutable</key><string>%s</string>"' % self.test_exe,
+                'else',
+                '  echo "<key>DBGError</key><string>not found</string>"',
+                '  ret=1',
+                'fi',
+                'echo "</dict></dict></plist>"',
+                'exit $ret'
+                ]
+
+        with open(self.dsym_for_uuid, "w") as writer:
+            for l in shell_cmds:
+                writer.write(l + '\n')
+
+        os.chmod(self.dsym_for_uuid, 0o755)
+
+        ### Create our corefile
+        retcode = call(self.create_corefile + " " +  self.corefile + " " + self.test_exe, shell=True)
+
+        ### Now run lldb on the corefile
+        ### which will give us a UUID
+        ### which we call dsym-for-uuid.sh with
+        ### which gives us a binary and dSYM
+        ### which lldb should load!
+
+
+        self.target = self.dbg.CreateTarget('')
+        err = lldb.SBError()
+        self.process = self.target.LoadCore(self.corefile)
+        self.assertEqual(self.process.IsValid(), True)
+        if self.TraceOn():
+            self.runCmd("image list")
+        self.assertEqual(self.target.GetNumModules(), 1)
+        fspec = self.target.GetModuleAtIndex(0).GetFileSpec()
+        filepath = fspec.GetDirectory() + "/" + fspec.GetFilename()
+        self.assertEqual(filepath, self.test_exe)
diff --git a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.cpp b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.cpp
new file mode 100644
index 0000000000000..8a8115af96728
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.cpp
@@ -0,0 +1,315 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <mach-o/loader.h>
+#include <vector>
+#include <string>
+#include <mach/thread_status.h>
+#include <string.h>
+#include <uuid/uuid.h>
+
+// Create an empty corefile with a "kern ver str" LC_NOTE.
+// If an existing binary is given as an optional 2nd argument on the cmd line,
+// the UUID from that binary will be encoded in the corefile.
+// Otherwise a pre-set UUID will be put in the corefile that
+// is created.
+
+
+union uint32_buf {
+    uint8_t bytebuf[4];
+    uint32_t val;
+};
+
+union uint64_buf {
+    uint8_t bytebuf[8];
+    uint64_t val;
+};
+
+void
+add_uint64(std::vector<uint8_t> &buf, uint64_t val)
+{
+    uint64_buf conv;
+    conv.val = val;
+    for (int i = 0; i < 8; i++)
+        buf.push_back(conv.bytebuf[i]);
+}
+
+void
+add_uint32(std::vector<uint8_t> &buf, uint32_t val)
+{
+    uint32_buf conv;
+    conv.val = val;
+    for (int i = 0; i < 4; i++)
+        buf.push_back(conv.bytebuf[i]);
+}
+
+std::vector<uint8_t>
+x86_lc_thread_load_command ()
+{
+    std::vector<uint8_t> data;
+    add_uint32 (data, LC_THREAD);                // thread_command.cmd
+    add_uint32 (data, 184);                      // thread_command.cmdsize
+    add_uint32 (data, x86_THREAD_STATE64);       // thread_command.flavor
+    add_uint32 (data, x86_THREAD_STATE64_COUNT); // thread_command.count
+    add_uint64 (data, 0x0000000000000000);       // rax
+    add_uint64 (data, 0x0000000000000400);       // rbx
+    add_uint64 (data, 0x0000000000000000);       // rcx
+    add_uint64 (data, 0x0000000000000000);       // rdx
+    add_uint64 (data, 0x0000000000000000);       // rdi
+    add_uint64 (data, 0x0000000000000000);       // rsi
+    add_uint64 (data, 0xffffff9246e2ba20);       // rbp
+    add_uint64 (data, 0xffffff9246e2ba10);       // rsp
+    add_uint64 (data, 0x0000000000000000);       // r8 
+    add_uint64 (data, 0x0000000000000000);       // r9 
+    add_uint64 (data, 0x0000000000000000);       // r10
+    add_uint64 (data, 0x0000000000000000);       // r11
+    add_uint64 (data, 0xffffff7f96ce5fe1);       // r12
+    add_uint64 (data, 0x0000000000000000);       // r13
+    add_uint64 (data, 0x0000000000000000);       // r14
+    add_uint64 (data, 0xffffff9246e2bac0);       // r15
+    add_uint64 (data, 0xffffff8015a8f6d0);       // rip
+    add_uint64 (data, 0x0000000000011111);       // rflags
+    add_uint64 (data, 0x0000000000022222);       // cs
+    add_uint64 (data, 0x0000000000033333);       // fs
+    add_uint64 (data, 0x0000000000044444);       // gs
+    return data;
+}
+
+void
+add_lc_note_kern_ver_str_load_command (std::vector<std::vector<uint8_t> > &loadcmds, 
+                                       std::vector<uint8_t> &payload,
+                                       int payload_file_offset,
+                                       std::string ident)
+{
+    std::vector<uint8_t> loadcmd_data;
+
+    add_uint32 (loadcmd_data, LC_NOTE);          // note_command.cmd
+    add_uint32 (loadcmd_data, 40);               // note_command.cmdsize
+    char lc_note_name[16];
+    memset (lc_note_name, 0, 16);
+    strcpy (lc_note_name, "kern ver str");
+    
+    // lc_note.data_owner
+    for (int i = 0; i < 16; i++)
+        loadcmd_data.push_back (lc_note_name[i]);
+
+    // we start writing the payload at payload_file_offset to leave
+    // room at the start for the header & the load commands.
+    uint64_t current_payload_offset = payload.size() + payload_file_offset;
+
+    add_uint64 (loadcmd_data, current_payload_offset);   // note_command.offset
+    add_uint64 (loadcmd_data, 4 + ident.size() + 1);       // note_command.size
+
+    loadcmds.push_back (loadcmd_data);
+
+    add_uint32 (payload, 1);                 // kerneL_version_string.version
+    for (int i = 0; i < ident.size() + 1; i++)
+    {
+        payload.push_back (ident[i]);
+    }
+}
+
+void
+add_lc_segment (std::vector<std::vector<uint8_t> > &loadcmds,
+                std::vector<uint8_t> &payload,
+                int payload_file_offset)
+{
+    std::vector<uint8_t> loadcmd_data;
+    struct segment_command_64 seg;
+    seg.cmd = LC_SEGMENT_64;
+    seg.cmdsize = sizeof (struct segment_command_64);  // no sections
+    memset (seg.segname, 0, 16);
+    seg.vmaddr = 0xffffff7f96400000;
+    seg.vmsize = 4096;
+    seg.fileoff = payload.size() + payload_file_offset;
+    seg.filesize = 0;
+    seg.maxprot = 1;
+    seg.initprot = 1;
+    seg.nsects = 0;
+    seg.flags = 0;
+
+    uint8_t *p = (uint8_t*) &seg;
+    for (int i = 0; i < sizeof (struct segment_command_64); i++)
+    {
+        loadcmd_data.push_back (*(p + i));
+    }
+    loadcmds.push_back (loadcmd_data);
+}
+
+std::string
+get_uuid_from_binary (const char *fn)
+{
+    FILE *f = fopen(fn, "r");
+    if (f == nullptr)
+    {
+        fprintf (stderr, "Unable to open binary '%s' to get uuid\n", fn);
+        exit(1);
+    }
+		uint32_t num_of_load_cmds = 0;
+		uint32_t size_of_load_cmds = 0;
+		std::string uuid;
+    off_t file_offset = 0;
+
+    uint8_t magic[4];
+    if (::fread (magic, 1, 4, f) != 4)
+    {
+        fprintf (stderr, "Failed to read magic number from input file %s\n", fn);
+        exit (1);
+    }
+    uint8_t magic_32_be[] = {0xfe, 0xed, 0xfa, 0xce};
+    uint8_t magic_32_le[] = {0xce, 0xfa, 0xed, 0xfe};
+    uint8_t magic_64_be[] = {0xfe, 0xed, 0xfa, 0xcf};
+    uint8_t magic_64_le[] = {0xcf, 0xfa, 0xed, 0xfe};
+
+    if (memcmp (magic, magic_32_be, 4) == 0 || memcmp (magic, magic_64_be, 4) == 0)
+    {
+        fprintf (stderr, "big endian corefiles not supported\n");
+        exit (1);
+    }
+
+    ::fseeko (f, 0, SEEK_SET);
+    if (memcmp (magic, magic_32_le, 4) == 0)
+    {
+        struct mach_header mh;
+        if (::fread (&mh, 1, sizeof (mh), f) != sizeof (mh))
+        {
+            fprintf (stderr, "error reading mach header from input file\n");
+            exit (1);
+        }
+        if (mh.cputype != CPU_TYPE_X86_64)
+        {
+            fprintf (stderr, "This tool creates an x86_64 corefile but "
+                     "the supplied binary '%s' is cputype 0x%x\n",
+                     fn, (uint32_t) mh.cputype);
+            exit (1);
+        }
+				num_of_load_cmds = mh.ncmds;
+				size_of_load_cmds = mh.sizeofcmds;
+        file_offset += sizeof (struct mach_header);
+    }
+    else
+    {
+        struct mach_header_64 mh;
+        if (::fread (&mh, 1, sizeof (mh), f) != sizeof (mh))
+        {
+            fprintf (stderr, "error reading mach header from input file\n");
+            exit (1);
+        }
+        if (mh.cputype != CPU_TYPE_X86_64)
+        {
+            fprintf (stderr, "This tool creates an x86_64 corefile but "
+                     "the supplied binary '%s' is cputype 0x%x\n",
+                     fn, (uint32_t) mh.cputype);
+            exit (1);
+        }
+				num_of_load_cmds = mh.ncmds;
+				size_of_load_cmds = mh.sizeofcmds;
+        file_offset += sizeof (struct mach_header_64);
+    }
+
+    off_t load_cmds_offset = file_offset;
+
+    for (int i = 0; i < num_of_load_cmds && (file_offset - load_cmds_offset) < size_of_load_cmds; i++)
+    {
+        ::fseeko (f, file_offset, SEEK_SET);
+        uint32_t cmd;
+        uint32_t cmdsize;
+        ::fread (&cmd, sizeof (uint32_t), 1, f);
+        ::fread (&cmdsize, sizeof (uint32_t), 1, f);
+        if (cmd == LC_UUID)
+        {
+            struct uuid_command uuidcmd;
+            ::fseeko (f, file_offset, SEEK_SET);
+            if (::fread (&uuidcmd, 1, sizeof (uuidcmd), f) != sizeof (uuidcmd))
+            {
+                fprintf (stderr, "Unable to read LC_UUID load command.\n");
+                exit (1);
+            }
+            uuid_string_t uuidstr;
+            uuid_unparse (uuidcmd.uuid, uuidstr);
+            uuid = uuidstr;
+            break;
+        }
+        file_offset += cmdsize;
+    }
+    return uuid;
+}
+
+int main (int argc, char **argv)
+{
+    if (argc != 2 && argc != 3)
+    {
+        fprintf (stderr, "usage: create-empty-corefile <output-core-name> [binary-to-copy-uuid-from]\n");
+        fprintf (stderr, "Create a Mach-O corefile with an LC_NOTE 'kern ver str' load command/payload\n");
+        fprintf (stderr, "If a binary is given as a second argument, the Mach-O UUID of that file will\n");
+        fprintf (stderr, "be read and used in the corefile's LC_NOTE payload.\n");
+        exit (1);
+    }
+
+    std::string ident = "EFI UUID=3F9BA21F-55EA-356A-A349-BBA6F51FE8B1";
+    if (argc == 3)
+    {
+        std::string uuid_from_file = get_uuid_from_binary (argv[2]);
+        if (!uuid_from_file.empty())
+        {
+            ident = "EFI UUID=";
+            ident += uuid_from_file;
+        }
+    }
+
+    // An array of load commands (in the form of byte arrays)
+    std::vector<std::vector<uint8_t> > load_commands;
+
+    // An array of corefile contents (page data, lc_note data, etc)
+    std::vector<uint8_t> payload;
+
+    // First add all the load commands / payload so we can figure out how large
+    // the load commands will actually be.
+    load_commands.push_back (x86_lc_thread_load_command());
+    add_lc_note_kern_ver_str_load_command (load_commands, payload, 0, ident);
+    add_lc_segment (load_commands, payload, 0);
+
+    int size_of_load_commands = 0;
+    for (const auto &lc : load_commands)
+        size_of_load_commands += lc.size();
+
+    int header_and_load_cmd_room = sizeof (struct mach_header_64) + size_of_load_commands;
+
+    // Erease the load commands / payload now that we know how much space is needed,
+    // redo it.
+    load_commands.clear();
+    payload.clear();
+
+    load_commands.push_back (x86_lc_thread_load_command());
+    add_lc_note_kern_ver_str_load_command (load_commands, payload, header_and_load_cmd_room, ident);
+    add_lc_segment (load_commands, payload, header_and_load_cmd_room);
+
+    struct mach_header_64 mh;
+    mh.magic = MH_MAGIC_64;
+    mh.cputype = CPU_TYPE_X86_64;
+    mh.cpusubtype = CPU_SUBTYPE_X86_64_ALL;
+    mh.filetype = MH_CORE;
+    mh.ncmds = load_commands.size();
+    mh.sizeofcmds = size_of_load_commands;
+    mh.flags = 0;
+    mh.reserved = 0;
+
+
+    FILE *f = fopen (argv[1], "w");
+
+    if (f == nullptr)
+    {
+        fprintf (stderr, "Unable to open file %s for writing\n", argv[1]);
+        exit (1);
+    }
+
+    fwrite (&mh, sizeof (struct mach_header_64), 1, f);
+
+    for (const auto &lc : load_commands)
+        fwrite (lc.data(), lc.size(), 1, f);
+
+    fseek (f, header_and_load_cmd_room, SEEK_SET);
+
+    fwrite (payload.data(), payload.size(), 1, f);
+
+    fclose (f);
+}
diff --git a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.mk b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.mk
new file mode 100644
index 0000000000000..4d3e320f0415c
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/create-empty-corefile.mk
@@ -0,0 +1,8 @@
+LEVEL = ../../../make
+
+MAKE_DSYM := NO
+
+CXX_SOURCES := create-empty-corefile.cpp
+EXE = create-empty-corefile
+
+include $(LEVEL)/Makefile.rules
diff --git a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/main.c b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/main.c
new file mode 100644
index 0000000000000..70a72e0b80b1e
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/main.c
@@ -0,0 +1,2 @@
+#include <stdio.h>
+int main () { puts ("this is the lc-note test program."); }
diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
index cdb7aa006242c..11b9e4588a817 100644
--- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
+++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp
@@ -19,6 +19,7 @@
 #include "lldb/Core/PluginManager.h"
 #include "lldb/Core/Section.h"
 #include "lldb/Host/Host.h"
+#include "lldb/Symbol/LocateSymbolFile.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Target/MemoryRegionInfo.h"
 #include "lldb/Target/Target.h"
@@ -323,6 +324,60 @@ Status ProcessMachCore::DoLoadCore() {
             addr, corefile_identifier.c_str());
     }
   }
+  if (found_main_binary_definitively == false 
+      && corefile_identifier.find("EFI ") != std::string::npos) {
+      UUID uuid;
+      if (corefile_identifier.find("UUID=") != std::string::npos) {
+          size_t p = corefile_identifier.find("UUID=") + strlen("UUID=");
+          std::string uuid_str = corefile_identifier.substr(p, 36);
+          uuid.SetFromStringRef(uuid_str);
+      }
+      if (uuid.IsValid()) {
+        if (log)
+          log->Printf("ProcessMachCore::DoLoadCore: Using the EFI "
+                      "from LC_IDENT/LC_NOTE 'kern ver str' string: '%s'", 
+                      corefile_identifier.c_str());
+
+          // We're only given a UUID here, not a load address.
+          // But there are python scripts in the EFI binary's dSYM which
+          // know how to relocate the binary to the correct load address.
+          // lldb only needs to locate & load the binary + dSYM.
+          ModuleSpec module_spec;
+          module_spec.GetUUID() = uuid;
+          module_spec.GetArchitecture() = GetTarget().GetArchitecture();
+
+          // Lookup UUID locally, before attempting dsymForUUID like action
+          FileSpecList search_paths =
+              Target::GetDefaultDebugFileSearchPaths();
+          module_spec.GetSymbolFileSpec() =
+              Symbols::LocateExecutableSymbolFile(module_spec, search_paths);
+          if (module_spec.GetSymbolFileSpec()) {
+            ModuleSpec executable_module_spec =
+                Symbols::LocateExecutableObjectFile(module_spec);
+            if (FileSystem::Instance().Exists(executable_module_spec.GetFileSpec())) {
+              module_spec.GetFileSpec() =
+                  executable_module_spec.GetFileSpec();
+            }
+          }
+
+          // Force a a dsymForUUID lookup, if that tool is available.
+          if (!module_spec.GetSymbolFileSpec())
+            Symbols::DownloadObjectAndSymbolFile(module_spec, true);
+
+          if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) {
+            ModuleSP module_sp(new Module(module_spec));
+            if (module_sp.get() && module_sp->GetObjectFile()) {
+              // Get the current target executable
+              ModuleSP exe_module_sp(GetTarget().GetExecutableModule());
+
+              // Make sure you don't already have the right module loaded
+              // and they will be uniqued
+              if (exe_module_sp.get() != module_sp.get())
+                GetTarget().SetExecutableModule(module_sp, eLoadDependentsNo);
+            }
+          }
+      }
+  }
 
   if (!found_main_binary_definitively &&
       (m_dyld_addr == LLDB_INVALID_ADDRESS ||

From 3b82b92c6b90691064c7531ecc03366cf7526d8a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 17 Jul 2019 21:45:19 +0000
Subject: [PATCH 397/451] hwasan: Initialize the pass only once.

This will let us instrument globals during initialization. This required
making the new PM pass a module pass, which should still provide access to
analyses via the ModuleAnalysisManager.

Differential Revision: https://reviews.llvm.org/D64843

llvm-svn: 366379
---
 clang/lib/CodeGen/BackendUtil.cpp             | 38 +++++--------------
 .../Instrumentation/HWAddressSanitizer.h      |  2 +-
 llvm/lib/Passes/PassRegistry.def              |  4 +-
 .../Instrumentation/HWAddressSanitizer.cpp    | 25 +++++++++---
 .../HWAddressSanitizer/basic.ll               |  8 ++--
 5 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 40a529c319f4a..497652e85b47a 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -967,17 +967,6 @@ static void addSanitizersAtO0(ModulePassManager &MPM,
   if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
     MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
   }
-
-  if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) {
-    bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
-    MPM.addPass(createModuleToFunctionPassAdaptor(
-        HWAddressSanitizerPass(/*CompileKernel=*/false, Recover)));
-  }
-
-  if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) {
-    MPM.addPass(createModuleToFunctionPassAdaptor(
-        HWAddressSanitizerPass(/*CompileKernel=*/true, /*Recover=*/true)));
-  }
 }
 
 /// A clean version of `EmitAssembly` that uses the new pass manager.
@@ -1176,23 +1165,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
                   UseOdrIndicator));
             });
       }
-      if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) {
-        bool Recover =
-            CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
-        PB.registerOptimizerLastEPCallback(
-            [Recover](FunctionPassManager &FPM,
-                      PassBuilder::OptimizationLevel Level) {
-              FPM.addPass(HWAddressSanitizerPass(
-                  /*CompileKernel=*/false, Recover));
-            });
-      }
-      if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) {
-        PB.registerOptimizerLastEPCallback(
-            [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
-              FPM.addPass(HWAddressSanitizerPass(
-                  /*CompileKernel=*/true, /*Recover=*/true));
-            });
-      }
       if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts))
         PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) {
           MPM.addPass(GCOVProfilerPass(*Options));
@@ -1219,6 +1191,16 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
       }
     }
 
+    if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) {
+      bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress);
+      MPM.addPass(HWAddressSanitizerPass(
+          /*CompileKernel=*/false, Recover));
+    }
+    if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) {
+      MPM.addPass(HWAddressSanitizerPass(
+          /*CompileKernel=*/true, /*Recover=*/true));
+    }
+
     if (CodeGenOpts.OptimizationLevel == 0)
       addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts);
   }
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index e34cf6c50d1ab..e3104eeb1d360 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -26,7 +26,7 @@ class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> {
 public:
   explicit HWAddressSanitizerPass(bool CompileKernel = false,
                                   bool Recover = false);
-  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
 
 private:
   bool CompileKernel;
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index e785558d5a732..347f75870eb3c 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -55,6 +55,8 @@ MODULE_PASS("globaldce", GlobalDCEPass())
 MODULE_PASS("globalopt", GlobalOptPass())
 MODULE_PASS("globalsplit", GlobalSplitPass())
 MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
+MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
+MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
 MODULE_PASS("instrorderfile", InstrOrderFilePass())
@@ -240,8 +242,6 @@ FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
 FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
 FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false))
 FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
-FUNCTION_PASS("hwasan", HWAddressSanitizerPass(false, false))
-FUNCTION_PASS("khwasan", HWAddressSanitizerPass(true, true))
 FUNCTION_PASS("msan", MemorySanitizerPass({}))
 FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
 FUNCTION_PASS("tsan", ThreadSanitizerPass())
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 22e8b4ee2e298..90a9f4955a4b4 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -277,12 +277,22 @@ class HWAddressSanitizerLegacyPass : public FunctionPass {
 
   StringRef getPassName() const override { return "HWAddressSanitizer"; }
 
+  bool doInitialization(Module &M) override {
+    HWASan = llvm::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover);
+    return true;
+  }
+
   bool runOnFunction(Function &F) override {
-    HWAddressSanitizer HWASan(*F.getParent(), CompileKernel, Recover);
-    return HWASan.sanitizeFunction(F);
+    return HWASan->sanitizeFunction(F);
+  }
+
+  bool doFinalization(Module &M) override {
+    HWASan.reset();
+    return false;
   }
 
 private:
+  std::unique_ptr<HWAddressSanitizer> HWASan;
   bool CompileKernel;
   bool Recover;
 };
@@ -309,10 +319,13 @@ FunctionPass *llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel,
 HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover)
     : CompileKernel(CompileKernel), Recover(Recover) {}
 
-PreservedAnalyses HWAddressSanitizerPass::run(Function &F,
-                                              FunctionAnalysisManager &FAM) {
-  HWAddressSanitizer HWASan(*F.getParent(), CompileKernel, Recover);
-  if (HWASan.sanitizeFunction(F))
+PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
+                                              ModuleAnalysisManager &MAM) {
+  HWAddressSanitizer HWASan(M, CompileKernel, Recover);
+  bool Modified = false;
+  for (Function &F : M)
+    Modified |= HWASan.sanitizeFunction(F);
+  if (Modified)
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll b/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll
index 670224b27ee2b..79e414a5e4ad3 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll
@@ -6,10 +6,10 @@
 ; RUN: opt < %s -hwasan -hwasan-recover=1 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,RECOVER-ZERO-BASED-SHADOW
 
 ; Ensure than hwasan runs with the new PM pass
-; RUN: opt < %s -passes='function(hwasan)' -hwasan-recover=0 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,ABORT-DYNAMIC-SHADOW
-; RUN: opt < %s -passes='function(hwasan)' -hwasan-recover=1 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,RECOVER-DYNAMIC-SHADOW
-; RUN: opt < %s -passes='function(hwasan)' -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,ABORT-ZERO-BASED-SHADOW
-; RUN: opt < %s -passes='function(hwasan)' -hwasan-recover=1 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,RECOVER-ZERO-BASED-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-recover=0 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,ABORT-DYNAMIC-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-recover=1 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,RECOVER-DYNAMIC-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,ABORT-ZERO-BASED-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-recover=1 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,RECOVER-ZERO-BASED-SHADOW
 
 ; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @hwasan.module_ctor, i8* bitcast (void ()* @hwasan.module_ctor to i8*) }]
 

From 0dd40a7d9f2f66ba749e9ba3dcd1fc4a5145a827 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 17 Jul 2019 21:45:34 +0000
Subject: [PATCH 398/451] gn build: Merge r366361.

llvm-svn: 366380
---
 llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
index 6c683d2a0243e..02e02eedcc260 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn
@@ -103,6 +103,7 @@ static_library("LLVMAArch64CodeGen") {
     "AArch64SIMDInstrOpt.cpp",
     "AArch64SelectionDAGInfo.cpp",
     "AArch64SpeculationHardening.cpp",
+    "AArch64StackTagging.cpp",
     "AArch64StorePairSuppress.cpp",
     "AArch64Subtarget.cpp",
     "AArch64TargetMachine.cpp",

From 1375659e0f0209754bb1401b60e0c4a5b4c77067 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Wed, 17 Jul 2019 21:55:39 +0000
Subject: [PATCH 399/451] Ah, forgot a debug line I left in the
 dsym-for-uuid.sh script to make sure it was correctly being disabled after
 this test case completed.

llvm-svn: 366381
---
 .../test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py
index 2acde16cbfa63..2b2e4fd9ce4d2 100644
--- a/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py
+++ b/lldb/packages/Python/lldbsuite/test/macosx/lc-note/kern-ver-str/TestKernVerStrLCNOTE.py
@@ -51,7 +51,6 @@ def test_lc_note(self):
                 'ret=0',
                 'echo "<?xml version=\\"1.0\\" encoding=\\"UTF-8\\"?>"',
                 'echo "<!DOCTYPE plist PUBLIC \\"-//Apple//DTD PLIST 1.0//EN\\" \\"http://www.apple.com/DTDs/PropertyList-1.0.dtd\\">"',
-                'date >> /tmp/log',
                 'echo "<plist version=\\"1.0\\">"',
                 '',
                 '# the last arugment is probably the uuid',

From 61fff7a33731b12f22425132de68c06d7f4208fd Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 17 Jul 2019 22:26:00 +0000
Subject: [PATCH 400/451] [X86] Make sure we mark 128/256 MLOAD as Legal with
 VLX when min-legal-vector-width=256 is in effect.

This started triggering an assertion after r364718 when we made
these Custom under AVX2.

llvm-svn: 366382
---
 llvm/lib/Target/X86/X86ISelLowering.cpp         | 12 +++++++-----
 llvm/test/CodeGen/X86/min-legal-vector-width.ll | 13 +++++++++++++
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 59540211d5495..15d4bde0167e9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1267,7 +1267,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
-      setOperationAction(ISD::MLOAD,  VT, Custom);
+      setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? Legal : Custom);
       setOperationAction(ISD::MSTORE, VT, Legal);
     }
 
@@ -1416,10 +1416,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
     // to 512-bit rather than use the AVX2 instructions so that we can use
     // k-masks.
-    for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
-         MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
-      setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? Legal : Custom);
-      setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
+    if (!Subtarget.hasVLX()) {
+      for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
+           MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+        setOperationAction(ISD::MLOAD,  VT, Custom);
+        setOperationAction(ISD::MSTORE, VT, Custom);
+      }
     }
 
     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index e5ff6014edc34..3d39f96adb591 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -706,3 +706,16 @@ define void @mul512(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-legal-vect
   store <64 x i8> %f, <64 x i8>* %c
   ret void
 }
+
+; This threw an assertion at one point.
+define <4 x i32> @mload_v4i32(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) "min-legal-vector-width"="256" {
+; CHECK-LABEL: mload_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
+; CHECK-NEXT:    vpblendmd (%rdi), %xmm1, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)

From 3628a8fae9f30ec024a5781aaf607c96c4ded05a Mon Sep 17 00:00:00 2001
From: Adrian McCarthy <amccarth@google.com>
Date: Wed, 17 Jul 2019 22:36:26 +0000
Subject: [PATCH 401/451] [NFC] Clarify a Cmake status message regarding Python
 on LLDBConfig

llvm-svn: 366383
---
 lldb/cmake/modules/LLDBConfig.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index ccb5de568b43c..ac71136dd026f 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -137,7 +137,7 @@ function(find_python_libs_windows)
          REGEX "^#define[ \t]+PY_VERSION[ \t]+\"[^\"]+\"")
     string(REGEX REPLACE "^#define[ \t]+PY_VERSION[ \t]+\"([^\"+]+)[+]?\".*" "\\1"
          PYTHONLIBS_VERSION_STRING "${python_version_str}")
-    message(STATUS "Found Python version ${PYTHONLIBS_VERSION_STRING}")
+    message(STATUS "Found Python library version ${PYTHONLIBS_VERSION_STRING}")
     string(REGEX REPLACE "([0-9]+)[.]([0-9]+)[.][0-9]+" "python\\1\\2" PYTHONLIBS_BASE_NAME "${PYTHONLIBS_VERSION_STRING}")
     set(PYTHONLIBS_VERSION_STRING "${PYTHONLIBS_VERSION_STRING}" PARENT_SCOPE)
     unset(python_version_str)

From ed9a91ce7ec6f8154ae5ded8c54f1521731622a6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 17 Jul 2019 22:41:53 +0000
Subject: [PATCH 402/451] AMDGPU: Set inaccessiblememonly on sendmsg intrinsics

llvm-svn: 366384
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1f835171386f7..3982444b54018 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -199,9 +199,9 @@ def int_amdgcn_wavefrontsize :
 // The first parameter is s_sendmsg immediate (i16),
 // the second one is copied to m0
 def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
-  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
 def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
-  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>;
+  Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
 
 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
   Intrinsic<[], [], [IntrConvergent]>;

From 7872d76a16d30a7ee33068c87aa5bb910f48ea64 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 17 Jul 2019 22:58:43 +0000
Subject: [PATCH 403/451] [AMDGPU] Simplify
 AMDGPUInstPrinter::printRegOperand()

Differential Revision: https://reviews.llvm.org/D64892

llvm-svn: 366385
---
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 184 +++---------------
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |  10 +-
 2 files changed, 37 insertions(+), 157 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 7f8b788efde12..01b53432cbb73 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -278,88 +278,8 @@ void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
                                         const MCRegisterInfo &MRI) {
+#if !defined(NDEBUG)
   switch (RegNo) {
-  case AMDGPU::VCC:
-    O << "vcc";
-    return;
-  case AMDGPU::SRC_VCCZ:
-    O << "src_vccz";
-    return;
-  case AMDGPU::SRC_EXECZ:
-    O << "src_execz";
-    return;
-  case AMDGPU::SRC_SCC:
-    O << "src_scc";
-    return;
-  case AMDGPU::EXEC:
-    O << "exec";
-    return;
-  case AMDGPU::M0:
-    O << "m0";
-    return;
-  case AMDGPU::SGPR_NULL:
-    O << "null";
-    return;
-  case AMDGPU::FLAT_SCR:
-    O << "flat_scratch";
-    return;
-  case AMDGPU::XNACK_MASK:
-    O << "xnack_mask";
-    return;
-  case AMDGPU::SRC_SHARED_BASE:
-    O << "src_shared_base";
-    return;
-  case AMDGPU::SRC_SHARED_LIMIT:
-    O << "src_shared_limit";
-    return;
-  case AMDGPU::SRC_PRIVATE_BASE:
-    O << "src_private_base";
-    return;
-  case AMDGPU::SRC_PRIVATE_LIMIT:
-    O << "src_private_limit";
-    return;
-  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
-    O << "src_pops_exiting_wave_id";
-    return;
-  case AMDGPU::LDS_DIRECT:
-    O << "src_lds_direct";
-    return;
-  case AMDGPU::VCC_LO:
-    O << "vcc_lo";
-    return;
-  case AMDGPU::VCC_HI:
-    O << "vcc_hi";
-    return;
-  case AMDGPU::TBA_LO:
-    O << "tba_lo";
-    return;
-  case AMDGPU::TBA_HI:
-    O << "tba_hi";
-    return;
-  case AMDGPU::TMA_LO:
-    O << "tma_lo";
-    return;
-  case AMDGPU::TMA_HI:
-    O << "tma_hi";
-    return;
-  case AMDGPU::EXEC_LO:
-    O << "exec_lo";
-    return;
-  case AMDGPU::EXEC_HI:
-    O << "exec_hi";
-    return;
-  case AMDGPU::FLAT_SCR_LO:
-    O << "flat_scratch_lo";
-    return;
-  case AMDGPU::FLAT_SCR_HI:
-    O << "flat_scratch_hi";
-    return;
-  case AMDGPU::XNACK_MASK_LO:
-    O << "xnack_mask_lo";
-    return;
-  case AMDGPU::XNACK_MASK_HI:
-    O << "xnack_mask_hi";
-    return;
   case AMDGPU::FP_REG:
   case AMDGPU::SP_REG:
   case AMDGPU::SCRATCH_WAVE_OFFSET_REG:
@@ -370,77 +290,37 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
   default:
     break;
   }
-
-  // The low 8 bits of the encoding value is the register index, for both VGPRs
-  // and SGPRs.
-  unsigned RegIdx = MRI.getEncodingValue(RegNo) & ((1 << 8) - 1);
-
-  unsigned NumRegs;
-  if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 1;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 1;
-  } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo)) {
-    O <<'v';
-    NumRegs = 2;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 2;
-  } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 4;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 4;
-  } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 3;
-  } else if (MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 3;
-  } else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 5;
-  } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 8;
-  } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 8;
-  } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {
-    O << 'v';
-    NumRegs = 16;
-  } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) {
-    O << 's';
-    NumRegs = 16;
-  } else  if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(RegNo)) {
-    O << 'a';
-    NumRegs = 1;
-  } else  if (MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo)) {
-    O << 'a';
-    NumRegs = 2;
-  } else  if (MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo)) {
-    O << 'a';
-    NumRegs = 4;
-  } else  if (MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo)) {
-    O << 'a';
-    NumRegs = 16;
-  } else  if (MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo)) {
-    O << 'a';
-    NumRegs = 32;
-  } else {
-    O << getRegisterName(RegNo);
-    return;
-  }
-
-  if (NumRegs == 1) {
-    O << RegIdx;
-    return;
-  }
-
-  O << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
+#endif
+
+  unsigned AltName = AMDGPU::Reg32;
+
+  if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo) ||
+      MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo) ||
+      MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg64;
+  else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg128;
+  else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SReg_96RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg96;
+  else if (MRI.getRegClass(AMDGPU::VReg_160RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SReg_160RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg160;
+  else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg256;
+  else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg512;
+  else if (MRI.getRegClass(AMDGPU::VReg_1024RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::SReg_1024RegClassID).contains(RegNo) ||
+           MRI.getRegClass(AMDGPU::AReg_1024RegClassID).contains(RegNo))
+    AltName = AMDGPU::Reg1024;
+
+  O << getRegisterName(RegNo, AltName);
 }
 
 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 59a2d89ca2c2c..d5948a7862ccd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -103,7 +103,7 @@ def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
 def EXEC_LO : SIReg<"exec_lo", 126>;
 def EXEC_HI : SIReg<"exec_hi", 127>;
 
-def EXEC : SIRegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+def EXEC : SIRegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>,
            DwarfRegAlias<EXEC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
@@ -129,7 +129,7 @@ def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
 def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
 def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
 
-def LDS_DIRECT : SIReg <"lds_direct", 254>;
+def LDS_DIRECT : SIReg <"src_lds_direct", 254>;
 
 def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
 def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
@@ -191,19 +191,19 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
 
 // SGPR registers
 foreach Index = 0-105 in {
-  def SGPR#Index : SIReg <"SGPR"#Index, Index, "S">;
+  def SGPR#Index : SIReg <"SGPR"#Index, Index, "s">;
 }
 
 // VGPR registers
 foreach Index = 0-255 in {
-  def VGPR#Index : SIReg <"VGPR"#Index, Index, "V"> {
+  def VGPR#Index : SIReg <"VGPR"#Index, Index, "v"> {
     let HWEncoding{8} = 1;
   }
 }
 
 // AccVGPR registers
 foreach Index = 0-255 in {
-  def AGPR#Index : SIReg <"AGPR"#Index, Index, "A"> {
+  def AGPR#Index : SIReg <"AGPR"#Index, Index, "a"> {
     let HWEncoding{8} = 1;
   }
 }

From 7bb5fc058314dba81dd652d8dcc74e133db0b445 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Wed, 17 Jul 2019 22:59:52 +0000
Subject: [PATCH 404/451] llvm-pdbdump: Fix several smaller issues with
 injected source compression handling

- getCompression() used to return a PDB_SourceCompression even though
  the docs for IDiaInjectedSource are explicit about the return value
  being compiler-dependent. Return an uint32_t instead, and make the
  printing code handle unknown values better by printing "Unknown" and
  the int value instead of not printing any compression.

- Print compressed contents as hex dump, not as string.

- Add compression type "DotNet", which is used (at least) by csc.exe,
  the C# compiler. Also add a lengthy comment describing the stream
  contents (derived from looking at the raw hex contents long enough
  to see the GUIDs, which led me to the roslyn and mono implementations
  for handling this).

- The native injected source dumper was dumping the contents of the
  whole data stream -- but csc.exe writes a stream that's padded with
  zero bytes to the next 512 boundary, and the dia api doesn't display
  those padding bytes. So make NativeInjectedSource::getCode() do the
  same thing.

Differential Revision: https://reviews.llvm.org/D64879

llvm-svn: 366386
---
 .../DebugInfo/PDB/DIA/DIAInjectedSource.h     |   2 +-
 .../llvm/DebugInfo/PDB/IPDBInjectedSource.h   |   6 +-
 llvm/include/llvm/DebugInfo/PDB/PDBExtras.h   |   3 +-
 llvm/include/llvm/DebugInfo/PDB/PDBTypes.h    |  68 ++++++++++++++++--
 .../DebugInfo/PDB/DIA/DIAInjectedSource.cpp   |   4 +-
 .../PDB/Native/NativeEnumInjectedSources.cpp  |  11 ++-
 llvm/lib/DebugInfo/PDB/PDBExtras.cpp          |   7 +-
 .../Inputs/dotnet_contents_compressed.pdb     | Bin 0 -> 11776 bytes
 .../Inputs/dotnet_contents_uncompressed.pdb   | Bin 0 -> 11776 bytes
 .../llvm-pdbutil/Inputs/dotnet_hashonly.pdb   | Bin 0 -> 11776 bytes
 .../llvm-pdbutil/injected-sources-native.test |  45 ++++++++++++
 .../tools/llvm-pdbutil/injected-sources.test  |  45 ++++++++++++
 llvm/tools/llvm-pdbutil/LinePrinter.h         |   3 +-
 llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp      |  14 ++--
 14 files changed, 181 insertions(+), 27 deletions(-)
 create mode 100644 llvm/test/tools/llvm-pdbutil/Inputs/dotnet_contents_compressed.pdb
 create mode 100644 llvm/test/tools/llvm-pdbutil/Inputs/dotnet_contents_uncompressed.pdb
 create mode 100644 llvm/test/tools/llvm-pdbutil/Inputs/dotnet_hashonly.pdb

diff --git a/llvm/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h b/llvm/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
index 8be06f80fc6e4..67963a06d9396 100644
--- a/llvm/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
+++ b/llvm/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h
@@ -25,7 +25,7 @@ class DIAInjectedSource : public IPDBInjectedSource {
   std::string getFileName() const override;
   std::string getObjectFileName() const override;
   std::string getVirtualFileName() const override;
-  PDB_SourceCompression getCompression() const override;
+  uint32_t getCompression() const override;
   std::string getCode() const override;
 
 private:
diff --git a/llvm/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h b/llvm/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
index 56e85d1faecdf..d5b36f9846b52 100644
--- a/llvm/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
+++ b/llvm/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
 #define LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
 
-#include "PDBTypes.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>
 #include <string>
@@ -32,7 +31,10 @@ class IPDBInjectedSource {
   virtual std::string getFileName() const = 0;
   virtual std::string getObjectFileName() const = 0;
   virtual std::string getVirtualFileName() const = 0;
-  virtual PDB_SourceCompression getCompression() const = 0;
+  // The returned value depends on the PDB producer,
+  // but 0 is guaranteed to mean "no compression".
+  // The enum PDB_SourceCompression lists known return values.
+  virtual uint32_t getCompression() const = 0;
   virtual std::string getCode() const = 0;
 };
 } // namespace pdb
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
index f5c3a5fcc99ff..45aba013e7c8e 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h
@@ -37,13 +37,12 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type);
 raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine);
-raw_ostream &operator<<(raw_ostream &OS,
-                        const PDB_SourceCompression &Compression);
 
 raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
 raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
 raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
 
+raw_ostream& dumpPDBSourceCompression(raw_ostream& OS, uint32_t Compression);
 
 template <typename T>
 void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {
diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index 742cb857a3360..c26d8d1ed10c9 100644
--- a/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -146,11 +146,69 @@ enum class PDB_Machine {
   WceMipsV2 = 0x169
 };
 
-enum class PDB_SourceCompression {
-  None,
-  RunLengthEncoded,
-  Huffman,
-  LZ,
+// A struct with an inner unnamed enum with explicit underlying type resuls
+// in an enum class that can implicitly convert to the underlying type, which
+// is convenient for this enum.
+struct PDB_SourceCompression {
+  enum : uint32_t {
+    // No compression. Produced e.g. by `link.exe /natvis:foo.natvis`.
+    None,
+    // Not known what produces this.
+    RunLengthEncoded,
+    // Not known what produces this.
+    Huffman,
+    // Not known what produces this.
+    LZ,
+    // Produced e.g. by `csc /debug`. The encoded data is its own mini-stream
+    // with the following layout (in little endian):
+    //   GUID LanguageTypeGuid;
+    //   GUID LanguageVendorGuid;
+    //   GUID DocumentTypeGuid;
+    //   GUID HashFunctionGuid;
+    //   uint32_t HashDataSize;
+    //   uint32_t CompressedDataSize;
+    // Followed by HashDataSize bytes containing a hash checksum,
+    // followed by CompressedDataSize bytes containing source contents.
+    //
+    // CompressedDataSize can be 0, in this case only the hash data is present.
+    // (CompressedDataSize is != 0 e.g. if `/embed` is passed to csc.exe.)
+    // The compressed data format is:
+    //   uint32_t UncompressedDataSize;
+    // If UncompressedDataSize is 0, the data is stored uncompressed and
+    // CompressedDataSize stores the uncompressed size.
+    // If UncompressedDataSize is != 0, then the data is in raw deflate
+    // encoding as described in rfc1951.
+    //
+    // A GUID is 16 bytes, stored in the usual
+    //   uint32_t
+    //   uint16_t
+    //   uint16_t
+    //   uint8_t[24]
+    // layout.
+    //
+    // Well-known GUIDs for LanguageTypeGuid are:
+    //   63a08714-fc37-11d2-904c-00c04fa302a1 C
+    //   3a12d0b7-c26c-11d0-b442-00a0244a1dd2 C++
+    //   3f5162f8-07c6-11d3-9053-00c04fa302a1 C#
+    //   af046cd1-d0e1-11d2-977c-00a0c9b4d50c Cobol
+    //   ab4f38c9-b6e6-43ba-be3b-58080b2ccce3 F#
+    //   3a12d0b4-c26c-11d0-b442-00a0244a1dd2 Java
+    //   3a12d0b6-c26c-11d0-b442-00a0244a1dd2 JScript
+    //   af046cd2-d0e1-11d2-977c-00a0c9b4d50c Pascal
+    //   3a12d0b8-c26c-11d0-b442-00a0244a1dd2 Visual Basic
+    //
+    // Well-known GUIDs for LanguageVendorGuid are:
+    //   994b45c4-e6e9-11d2-903f-00c04fa302a1 Microsoft
+    //
+    // Well-known GUIDs for DocumentTypeGuid are:
+    //   5a869d0b-6611-11d3-bd2a-0000f80849bd Text
+    //
+    // Well-known GUIDs for HashFunctionGuid are:
+    //   406ea660-64cf-4c82-b6f0-42d48172a799 MD5    (HashDataSize is 16)
+    //   ff1816ec-aa5e-4d10-87f7-6f4963833460 SHA1   (HashDataSize is 20)
+    //   8829d00f-11b8-4213-878b-770e8597ac16 SHA256 (HashDataSize is 32)
+    DotNet = 101,
+  };
 };
 
 /// These values correspond to the CV_call_e enumeration, and are documented
diff --git a/llvm/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp b/llvm/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
index 211b4e11ac462..032b230b5faa0 100644
--- a/llvm/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
+++ b/llvm/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp
@@ -41,11 +41,11 @@ std::string DIAInjectedSource::getVirtualFileName() const {
                           &IDiaInjectedSource::get_virtualFilename);
 }
 
-PDB_SourceCompression DIAInjectedSource::getCompression() const {
+uint32_t DIAInjectedSource::getCompression() const {
   DWORD Compression = 0;
   if (S_OK != SourceFile->get_sourceCompression(&Compression))
     return PDB_SourceCompression::None;
-  return static_cast<PDB_SourceCompression>(Compression);
+  return static_cast<uint32_t>(Compression);
 }
 
 std::string DIAInjectedSource::getCode() const {
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
index 7c7901b708cc8..f17ff5bb01f21 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -17,14 +17,15 @@ namespace pdb {
 
 namespace {
 
-Expected<std::string> readStreamData(BinaryStream &Stream) {
-  uint32_t Offset = 0, DataLength = Stream.getLength();
+Expected<std::string> readStreamData(BinaryStream &Stream, uint32_t Limit) {
+  uint32_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
   std::string Result;
   Result.reserve(DataLength);
   while (Offset < DataLength) {
     ArrayRef<uint8_t> Data;
     if (auto E = Stream.readLongestContiguousChunk(Offset, Data))
       return std::move(E);
+    Data = Data.take_front(DataLength - Offset);
     Offset += Data.size();
     Result += toStringRef(Data);
   }
@@ -62,9 +63,7 @@ class NativeInjectedSource final : public IPDBInjectedSource {
     return *VName;
   }
 
-  PDB_SourceCompression getCompression() const override {
-    return static_cast<PDB_SourceCompression>(Entry.Compression);
-  }
+  uint32_t getCompression() const override { return Entry.Compression; }
 
   std::string getCode() const override {
     // Get name of stream storing the data.
@@ -81,7 +80,7 @@ class NativeInjectedSource final : public IPDBInjectedSource {
       return "(failed to open data stream)";
     }
 
-    auto Data = readStreamData(**ExpectedFileStream);
+    auto Data = readStreamData(**ExpectedFileStream, Entry.FileSize);
     if (!Data) {
       consumeError(Data.takeError());
       return "(failed to read data)";
diff --git a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index 59eadd71856c3..354a99476c4bf 100644
--- a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -320,14 +320,17 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
   return OS;
 }
 
-raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
-                                   const PDB_SourceCompression &Compression) {
+raw_ostream &llvm::pdb::dumpPDBSourceCompression(raw_ostream &OS,
+                                                 uint32_t Compression) {
   switch (Compression) {
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, None, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, Huffman, OS)
     CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, LZ, OS)
     CASE_OUTPUT_ENUM_CLASS_STR(PDB_SourceCompression, RunLengthEncoded, "RLE",
                                OS)
+    CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, DotNet, OS)
+  default:
+    OS << "Unknown (" << Compression << ")";
   }
   return OS;
 }
diff --git a/llvm/test/tools/llvm-pdbutil/Inputs/dotnet_contents_compressed.pdb b/llvm/test/tools/llvm-pdbutil/Inputs/dotnet_contents_compressed.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..b031385099cde50b8a0c4c56dfaf689c7513ebd7
GIT binary patch
literal 11776
zcmeHNe`p(39DkQ~uDPZ^GG{hBk*h4Qu5B*eX4j6fidkH<HO|_JY}9RCmeO<A93*Ah
z30sFU?LRV#gGCr@s}rLjf(n~Dtzd`Q+6^349R8s~2Z}f+;#LNWpYPp0iKPUJh-v!1
z<a6J5@B8lEd!O(7aqqo^wMg7Z7~OqJKn>K^D&daDl(jy;zf7*$*ufwU^T(DT_&nW1
z?z5D*761by86oqTfhHzez<l<9>BR=AiN>)?8&KselPQMIWQuSVQpgO<<o^oc|3Y^8
zKkD|HuR(f4SD%p(8~yBi&xWR3fuR?M;+x)k_e-9TY=mt2u6_0NsBE(c&k+V@>i;w!
zW{>}LPt}>O=SUSrF>^2={GWrId0@J|r@iIE!mIA19c*Olu;bJh!7a(3u7Bk|+G5?8
zo;b3@-F?-4ekEhKOG4*=@hr{ke7iFI@+~72d2!9|#o*{5@Z@;o$!Cu*A0I9X1`eh6
z@0^HVtG<KyOIyam^}g1&$DjJGbv&J}xT0PRwZC<Ja`WX-o$tsihtJCLsCUqtOv}-!
zWR3jkV6wmJb$LSm{Me{Bb?(6Bss7Qa=G1{7f=O>`L$W_rwYl=d;AoAu{oCfr$@Elm
zG88<fCC_MQw4!R)pT|!Rz4_sz_3DyhF*(hy@s`=ixt`tABK2Hl;O>pPH>j^PQUwC*
zIur3or>^hst?xBrMrUhO*9~7J!B}LvsfU~BOJNy6xjX=nh;^gIL;hcv{pgnlP|R)L
zC}*#%pO9udZc=(|5u4uU&r&N`0Z`sf>D=;bww+oD*`k1u{9u53uaB*CQ)znaB4y73
z@aa7iZTdFcrNt0l#fpp5Ek9p)fBV;=PrOdY$M1Zwo6>~YM%Wmm$(Qn_EhTCjBo1Wr
zJM?<<j`EWje-DioemfrF0eTl{m#sZGeOSsau`xEz{y<y_V3x-YE6{5IA+u+I)^t@)
z^(Ohs1!q-56SoiB8tPw*k|PK1#~ew7DGwN+z9^*d7<iDCn`1xvA;$T@)^P2vV5~3R
zzv~G@i}f+Kh<Qyrjpq`xCIAns$%^wg`bioJj~mp8rWzrFfr4c~!JJQhdnV!E2Gj{k
zkdEf+c{X1ceE%mOKj&;P*L|g|@>LgmNh`xt&<S5FKpn6G;Nn5xe)Hzn5guoq4rHGX
za0_Xf>Gth5=;`gp++ldyi0gZOW{@?m^#_|*1p-K0|B>$|PtVs1c2=fxU{z6OCXoI<
zo1k>mR!OLCT+@1kzdgq`2jlx8cDD4mC6t*ZPBUXG%&l`)=Q{t}gj%U>T5Mk^)@?8~
z*3}zLFcmvQYEQIlZ#2F~HzND2h;B`fCe+-UM6`}1TG!cR(Kp0GKnBb;CiDW}A%OQg
zam)xZ{!1xG<_a(j(EmVM;34z>APDaO1_0W>p)p_#s6*d+1ZYDTYjk!mK+anPaA%ex
lOb&DcE`a(Zy@wo5>&yy3NPaM|I6sOZN+B2!4Ezra`~^VWUi$z5

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-pdbutil/Inputs/dotnet_contents_uncompressed.pdb b/llvm/test/tools/llvm-pdbutil/Inputs/dotnet_contents_uncompressed.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..be011cfe6a08ad936e4ee8e5e1ccbfde50b1c27c
GIT binary patch
literal 11776
zcmeHNU1(fI6h6B~o89cD*|w&&6`2MKNz?9b+L-=Wh_Fp<FiR~FYC~+hxqC0U&COof
zyJ-wXh$#A^(g$A(Do7s`g+6GW)Jh&Sps0mbg+f8}L8^!)rC?v|_B(U$v};Hgiin%d
z9QNBYGiP@0%y;JJ&dg?Qqv(3BRT7z0rn6II2lt5`$#gm%UB7pbAYu8TwFo}Xama(d
z(%*H!z-(E|a%NyB=^n70{onL;S7;~CV=V!onyFlFfG(Hs!?%`tW?(V@uhjmpXP5t@
zZEq&KLQ_ua*$`6QeP@3z_H5rjvui$f>Gk(#RYtZkD&_LQ?F*AC%{sh97+7rl^Lki0
z|8tz1vrsRQChBTdVL<!83OUQbbYtXT@7#)uiBp3#d*Dp?-7or{JpJ3R7ZRs>{dm*c
zuM8!ui-~iaiEcFZpZlYAUHQldE!kK9a{G;!x{s{^N3Q{EJKp-d^!HCz`P(Cn-*vw?
z@qK<S!Soe!<EHmq&M?J(({bEGZqdn$lkuqG<UG$$<ERi`DOa)$al*CpBAc@d?H%Hz
z7@YD-=6EvW7ChH6lZT3S$sDi?W_z0~P2{}1b$ra3oHSf}a-=wA*%Q`;CFD~!wskxv
zUWiBi57$M@-5H1nCZ+G5Q(NkZfty!vUga0L9&S`-_pnzqh8^d`_||c^;10`CmNYyf
zW1(r3hclb_O<@DTr92F<=zXKlL+yWW_$3{hMm0Zraps-#dwvMr>q|Z7#}TpgzWOcq
zf_8w*{_^y|>MMbrdkNc2Kuc{fz@s-wo8?+P8dRxr4S-J{;AH7rUY8NxMhy*L_5MEh
z@!=o)Kid=ufBNCfG0u~vy|58AATyWCM@rl`Si&gg8uT6+<MKkJp2M?6I!H%&nxDlw
z7U)*oKCGi>=r9e@b)+={vOYmrKz9RL?wtYN)2)xB=Gj*<IBO#`bNgtZq5idqDkUgC
zR>`7GHNXImMJ@Hmz$4Tw=YEVs%=4K8+0LVVg;H_q=u@s;C=soqP14Tud5HW4pp`l*
z($YO?mah8a1});HMvG>kZW$0*^LcD9ChFaQ2SFq9@mjsi*6X?-|Lo(*NSECAHBrkO
zF?!jKV<{MhuLZCLXa|(EBJhxm{5h-A{M&)b`vGNP9VI7-J`KIF|J*qhOv)hj>`3?R
z+Lp;6@77O}^D4jED+EPJ6~?Y2E;EE~-%W5n?yD@^Hl=yLp`Kpln1j@LNKh>Q+!rp)
zmxwIb4=d}E)z$9*o<OToIBp&5FIX<6<WVlAa6*(CHFJ5hIO4d*n4e(Tj_IYU9}@9C
zl4;!?kj3v1D}X2<_n6T418smBclw$UWc-)XjKT$Q2H^jJ^uj~({~!nt0n@-0faid3
zfGrqXj{^e;V~<X&0e0RhKsmDsVRm2yhygq%`8n)x-e;}^wA2OzYigq!x)z!N&A|V_
Fz%6{tQvUz|

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-pdbutil/Inputs/dotnet_hashonly.pdb b/llvm/test/tools/llvm-pdbutil/Inputs/dotnet_hashonly.pdb
new file mode 100644
index 0000000000000000000000000000000000000000..f0c40e9f4827490fb76c178eb3ca27d0a7785761
GIT binary patch
literal 11776
zcmeHNOK4L;6upVnHvQSuTEA!%3azpEXssW_ifPrNp_U5TQmt*8*w8c&lKR^f6vd6Y
z5Jk|1bRnqVLRV^~h!hk*VAYL+;6m_&QW11x<GJ(RNDL()BBn2M$;q8JcQSA0-1$%Z
z;Xq7_YoUIJ&+T(M9sZV$j+HKtrzn4JeG5UX;zP3#Jjl<GnYvQnCSYJVp(L9bSVgJ@
zWV8PZ9(XOQcpNJ+098zhL=JQ!F$LdB%*?<<{-3V=Z)TVOR=3kGujOv9dzS^V3(p?k
z`|^r&!<q9ZwRKh2U&}nUF_Ps~^YYZFWU~rS5(Xyf|2!WikN^D4)k)Qpq>8GTX&6xc
zPeV=?n7(&4*Nx<k+E2C6aMPgm;=P6~=ia@2YClz{KNnm$+GY=p+J}}9ea~wg`dB_Y
zvHNm~|JXOJF>ttMPnp5dtg7>O`hUC*B_8e0dt7sA-?Q!!dlt}Vnkp6<HIdph6;l;S
z%fR^O@z1=FtL0R_HSO_Opgj`V-&ftIMYZ<L!AL}N1>!`3R8x;M@k(JHz`4u^Sk%5z
z=OOdI=e_Y*22jimHP^gP);_oV>7t&~<A_*zU;dU`K_$R>U;R*A(r(zfm9Q-cD9H>4
zxc9nfiI}T<j3On^0r2TGjuyW4cZQ<~FQc5C`*rU}uD8Buyj@|l-nw#Q52x`{J#0id
zNX+^2krKBJ7Avy(0=-r|aefw(@8QwHW5gpo!0%#RXy|g>KFp?_)Jkpi6>)ihD31|#
zK-U0D{+t2c)6KQH$JkeeaMmJd=5~dlq5fJNX>yQ$Op`>JGJpZ@i%QJLzyc~3V?X*K
z#(7_p-`UX+?T;Pm*rtV}{Y0fyA?!S!TS%V(%Bd<T&Qt4Q@tTht)QG1VC5i#lGT^|R
z&wYC$k!u6)1bIlub9I)@*QW3P?BfGAuh{n$P|1lxIub6zRKR;S2e1^V1f-NBFh@Lm
z8kTYT?LhMVfV8mAr-<>m9XhrBh!ch#S}fA-5<#k6>1kNK%;!Ve>YJ`HnO^P{jI4yR
zVpmZl5?FrSO>jDHt1MhMsd>L4-=5}}gXDRLkuCpR7tTx<o5)xXOY5Z7>F)nFp;qp#
z;b?DTG^CL`+SwP3lN%>Q?w(+0cQDo!(E`1CL?|2y#@*=;iFhB$w5~D8;x$ArkPnDG
zCiGNbF(CV$x@H6!|5A#PxdRvk_<taE@R0aF2*Pc^0Kj7bj{y&XrRZDhfvpH*k4`fH
sc3vqUomqe|JJ1Fc0^BG0J?wDaXHEx{WCjCenNbW?3dMk8;6E_%6RaK{!2kdN

literal 0
HcmV?d00001

diff --git a/llvm/test/tools/llvm-pdbutil/injected-sources-native.test b/llvm/test/tools/llvm-pdbutil/injected-sources-native.test
index 374f14fc32102..17388244b6adf 100644
--- a/llvm/test/tools/llvm-pdbutil/injected-sources-native.test
+++ b/llvm/test/tools/llvm-pdbutil/injected-sources-native.test
@@ -28,3 +28,48 @@
 
 ; NEGATIVE:      ---INJECTED SOURCES---
 ; NEGATIVE-NEXT: There are no injected sources.
+
+; PDB created by running `csc /debug Hello.cs`
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/dotnet_hashonly.pdb | FileCheck --check-prefix=HASH %s
+
+; HASH:      ---INJECTED SOURCES---
+; HASH:      C:\src\llvm-mono\Hello.cs (92 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=269413292, compression=DotNet
+; HASH-NEXT: Compressed data (
+; HASH-NEXT:   0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1  |.bQ?.....S..O....EK......?..O...|
+; HASH-NEXT:   0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460  |...Z.f...*....I.....^..M..oIc.4`|
+; HASH-NEXT:   0040: 14000000 00000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611           |.........)..t..f..^..4.u.d..|
+; HASH-NEXT: )
+
+; PDB created by running `csc /debug Hello.cs` with Hello.cs smaller than 200 bytes.
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/dotnet_contents_uncompressed.pdb | FileCheck --check-prefix=UNCOMP %s
+
+; UNCOMP:      ---INJECTED SOURCES---
+; UNCOMP:      C:\src\llvm-mono\Hello.cs (232 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=323787205, compression=DotNet
+; UNCOMP-NEXT: Compressed data (
+; UNCOMP-NEXT:   0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1  |.bQ?.....S..O....EK......?..O...|
+; UNCOMP-NEXT:   0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460  |...Z.f...*....I.....^..M..oIc.4`|
+; UNCOMP-NEXT:   0040: 14000000 8C000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611 00000000  |.........)..t..f..^..4.u.d......|
+; UNCOMP-NEXT:   0060: 6E616D65 73706163 65204865 6C6C6F57 6F726C64 207B0D0A 636C6173 73204865  |namespace HelloWorld {..class He|
+; UNCOMP-NEXT:   0080: 6C6C6F20 7B0D0A20 20737461 74696320 766F6964 204D6169 6E282920 7B205379  |llo {..  static void Main() { Sy|
+; UNCOMP-NEXT:   00A0: 7374656D 2E436F6E 736F6C65 2E577269 74654C69 6E652822 48656C6C 6F206173  |stem.Console.WriteLine("Hello as|
+; UNCOMP-NEXT:   00C0: 64666A6B 6C777763 6F697762 72796669 75667566 20576F72 6C642122 293B207D  |dfjklwwcoiwbryfiufuf World!"); }|
+; UNCOMP-NEXT:   00E0: 0D0A7D0D 0A7D0D0A                                                        |..}..}..|
+; UNCOMP-NEXT: )
+
+; PDB created by running `csc /debug Hello.cs` with Hello.cs larger than 200 bytes.
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/dotnet_contents_compressed.pdb | FileCheck --check-prefix=COMP %s
+
+; COMP:      ---INJECTED SOURCES---
+; COMP:      C:\src\llvm-mono\Hello.cs (218 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=616104201, compression=DotNet
+; COMP-NEXT: Compressed data (
+; COMP-NEXT:   0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1  |.bQ?.....S..O....EK......?..O...|
+; COMP-NEXT:   0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460  |...Z.f...*....I.....^..M..oIc.4`|
+; COMP-NEXT:   0040: 14000000 7E000000 52CD36A0 6A9824CD A3034543 7FA9765E D572DA21 FB000000  |....~...R.6.j.$...EC..v^.r.!....|
+; COMP-NEXT:   0060: CB4BCC4D 2D2E484C 4E55F048 CDC9C90F CF2FCA49 51A8E6E5 4ACE492C 2E868881  |.K.M-.HLNU.H...../.IQ...J.I,....|
+; COMP-NEXT:   0080: B80A0AC5 25892599 C90A65F9 99290ABE 8999791A 9A0AD50A C195C525 A9B97ACE  |....%.%...e..)....y........%..z.|
+; COMP-NEXT:   00A0: F979C5F9 39A97AE1 459925A9 3E9979A9 1A4A109D 89C52969 59D939E5 E5C9F999  |.y..9.z.E.%.>.y..J....)iY.9.....|
+; COMP-NEXT:   00C0: E5494595 6999A569 A5690321 0CF698A2 92A6B542 2D2F1704 0100               |.IE.i..i.i.!.......B-/....|
+; COMP-NEXT: )
diff --git a/llvm/test/tools/llvm-pdbutil/injected-sources.test b/llvm/test/tools/llvm-pdbutil/injected-sources.test
index f3bbfb49e2881..71fb392dae98c 100644
--- a/llvm/test/tools/llvm-pdbutil/injected-sources.test
+++ b/llvm/test/tools/llvm-pdbutil/injected-sources.test
@@ -27,3 +27,48 @@
 
 ; NEGATIVE:      ---INJECTED SOURCES---
 ; NEGATIVE-NEXT: There are no injected sources.
+
+; PDB created by running `csc /debug Hello.cs`
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/dotnet_hashonly.pdb | FileCheck --check-prefix=HASH %s
+
+; HASH:      ---INJECTED SOURCES---
+; HASH:      C:\src\llvm-mono\Hello.cs (92 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=269413292, compression=DotNet
+; HASH-NEXT: Compressed data (
+; HASH-NEXT:   0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1  |.bQ?.....S..O....EK......?..O...|
+; HASH-NEXT:   0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460  |...Z.f...*....I.....^..M..oIc.4`|
+; HASH-NEXT:   0040: 14000000 00000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611           |.........)..t..f..^..4.u.d..|
+; HASH-NEXT: )
+
+; PDB created by running `csc /debug Hello.cs` with Hello.cs smaller than 200 bytes.
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/dotnet_contents_uncompressed.pdb | FileCheck --check-prefix=UNCOMP %s
+
+; UNCOMP:      ---INJECTED SOURCES---
+; UNCOMP:      C:\src\llvm-mono\Hello.cs (232 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=323787205, compression=DotNet
+; UNCOMP-NEXT: Compressed data (
+; UNCOMP-NEXT:   0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1  |.bQ?.....S..O....EK......?..O...|
+; UNCOMP-NEXT:   0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460  |...Z.f...*....I.....^..M..oIc.4`|
+; UNCOMP-NEXT:   0040: 14000000 8C000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611 00000000  |.........)..t..f..^..4.u.d......|
+; UNCOMP-NEXT:   0060: 6E616D65 73706163 65204865 6C6C6F57 6F726C64 207B0D0A 636C6173 73204865  |namespace HelloWorld {..class He|
+; UNCOMP-NEXT:   0080: 6C6C6F20 7B0D0A20 20737461 74696320 766F6964 204D6169 6E282920 7B205379  |llo {..  static void Main() { Sy|
+; UNCOMP-NEXT:   00A0: 7374656D 2E436F6E 736F6C65 2E577269 74654C69 6E652822 48656C6C 6F206173  |stem.Console.WriteLine("Hello as|
+; UNCOMP-NEXT:   00C0: 64666A6B 6C777763 6F697762 72796669 75667566 20576F72 6C642122 293B207D  |dfjklwwcoiwbryfiufuf World!"); }|
+; UNCOMP-NEXT:   00E0: 0D0A7D0D 0A7D0D0A                                                        |..}..}..|
+; UNCOMP-NEXT: )
+
+; PDB created by running `csc /debug Hello.cs` with Hello.cs larger than 200 bytes.
+; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
+; RUN:   %p/Inputs/dotnet_contents_compressed.pdb | FileCheck --check-prefix=COMP %s
+
+; COMP:      ---INJECTED SOURCES---
+; COMP:      C:\src\llvm-mono\Hello.cs (218 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=616104201, compression=DotNet
+; COMP-NEXT: Compressed data (
+; COMP-NEXT:   0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1  |.bQ?.....S..O....EK......?..O...|
+; COMP-NEXT:   0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460  |...Z.f...*....I.....^..M..oIc.4`|
+; COMP-NEXT:   0040: 14000000 7E000000 52CD36A0 6A9824CD A3034543 7FA9765E D572DA21 FB000000  |....~...R.6.j.$...EC..v^.r.!....|
+; COMP-NEXT:   0060: CB4BCC4D 2D2E484C 4E55F048 CDC9C90F CF2FCA49 51A8E6E5 4ACE492C 2E868881  |.K.M-.HLNU.H...../.IQ...J.I,....|
+; COMP-NEXT:   0080: B80A0AC5 25892599 C90A65F9 99290ABE 8999791A 9A0AD50A C195C525 A9B97ACE  |....%.%...e..)....y........%..z.|
+; COMP-NEXT:   00A0: F979C5F9 39A97AE1 459925A9 3E9979A9 1A4A109D 89C52969 59D939E5 E5C9F999  |.y..9.z.E.%.>.y..J....)iY.9.....|
+; COMP-NEXT:   00C0: E5494595 6999A569 A5690321 0CF698A2 92A6B542 2D2F1704 0100               |.IE.i..i.i.!.......B-/....|
+; COMP-NEXT: )
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.h b/llvm/tools/llvm-pdbutil/LinePrinter.h
index 74e341267accd..7ecfae17354f0 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.h
+++ b/llvm/tools/llvm-pdbutil/LinePrinter.h
@@ -132,8 +132,7 @@ struct AutoIndent {
 
 template <class T>
 inline raw_ostream &operator<<(LinePrinter &Printer, const T &Item) {
-  Printer.getStream() << Item;
-  return Printer.getStream();
+  return Printer.getStream() << Item;
 }
 
 enum class PDB_ColorItem {
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index e6e89d4bf2201..785a980867919 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -947,9 +947,6 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
     std::string VFName = stringOr(IS->getVirtualFileName(), "<null>");
     uint32_t CRC = IS->getCrc32();
 
-    std::string CompressionStr;
-    llvm::raw_string_ostream Stream(CompressionStr);
-    Stream << IS->getCompression();
     WithColor(Printer, PDB_ColorItem::Path).get() << File;
     Printer << " (";
     WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Size;
@@ -968,7 +965,9 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
     Printer << ", ";
     WithColor(Printer, PDB_ColorItem::Keyword).get() << "compression";
     Printer << "=";
-    WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Stream.str();
+    dumpPDBSourceCompression(
+        WithColor(Printer, PDB_ColorItem::LiteralValue).get(),
+        IS->getCompression());
 
     if (!opts::pretty::ShowInjectedSourceContent)
       continue;
@@ -977,7 +976,12 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
     int Indent = Printer.getIndentLevel();
     Printer.Unindent(Indent);
 
-    Printer.printLine(IS->getCode());
+    if (IS->getCompression() == PDB_SourceCompression::None)
+      Printer.printLine(IS->getCode());
+    else
+      Printer.formatBinary("Compressed data",
+                           arrayRefFromStringRef(IS->getCode()),
+                           /*StartOffset=*/0);
 
     // Re-indent back to the original level.
     Printer.Indent(Indent);

From 3eab4819f25afad53060e8d5bb97c7eab46bb586 Mon Sep 17 00:00:00 2001
From: Denis Bakhvalov <denis.bakhvalov@intel.com>
Date: Wed, 17 Jul 2019 23:28:39 +0000
Subject: [PATCH 405/451] [llvm-bcanalyzer] Fixed error 'Expected<T> must be
 checked before access or destruction'

After rL365286 I had failing test:
  LLVM :: tools/gold/X86/v1.12/thinlto_emit_linked_objects.ll

It was failing with the output:
$ llvm-bcanalyzer --dump llvm/test/tools/gold/X86/v1.12/Output/thinlto_emit_linked_objects.ll.tmp3.o.thinlto.bc
Expected<T> must be checked before access or destruction.
Unchecked Expected<T> contained error:
Unexpected end of file reading 0 of 0 bytesStack dump:

Change-Id: I07e03262074ea5e0aae7a8d787d5487c87f914a2
llvm-svn: 366387
---
 llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 879405097419d..9c30d563a3147 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -539,8 +539,11 @@ BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
 
 Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
                                Optional<StringRef> CheckHash) {
-  if (Expected<CurStreamTypeType> H = analyzeHeader(O, Stream))
-    CurStreamType = *H;
+  Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
+  if (!MaybeType)
+    return MaybeType.takeError();
+  else
+    CurStreamType = *MaybeType;
 
   Stream.setBlockInfo(&BlockInfo);
 

From 6abd78cc7c97356c1f6e9bf65b19ef3c9cc0f6b9 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Wed, 17 Jul 2019 23:31:59 +0000
Subject: [PATCH 406/451] Make DT a transitive dependency of LI.

Summary:
LoopInfoWrapperPass::verify uses DT, which means DT must be alive
even if it has no direct users.

Fixes a crash in expensive checks mode.

Reviewers: pcc, leonardchan

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64896

llvm-svn: 366388
---
 llvm/lib/Analysis/LoopInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index c59e48a7a98e1..aa5da0859805f 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -1039,7 +1039,7 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
 
 void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
 }
 
 void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {

From 749f556bbd146f1bf066a994e7a9a9fdc65ab6a1 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 17 Jul 2019 23:35:15 +0000
Subject: [PATCH 407/451] hwasan: Use C++ driver for cfi.cc test.

It turns out that this test was only passing by accident. It was relying on
the optimizer to remove the only reference to A's vtable by realizing that
the CFI check will always fail. The vtable contains a reference to RTTI in
libc++, which will be unresolved because the C driver won't link against it.

This was found by my prototype implementation of HWASAN for globals, which
happens to end up preserving the reference.

Differential Revision: https://reviews.llvm.org/D64890

llvm-svn: 366389
---
 compiler-rt/test/hwasan/TestCases/cfi.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/hwasan/TestCases/cfi.cc b/compiler-rt/test/hwasan/TestCases/cfi.cc
index 457e29659e77d..e64b556171e62 100644
--- a/compiler-rt/test/hwasan/TestCases/cfi.cc
+++ b/compiler-rt/test/hwasan/TestCases/cfi.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_hwasan -fsanitize=cfi -fno-sanitize-trap=cfi -flto -fvisibility=hidden -fuse-ld=lld %s -o %t
+// RUN: %clangxx_hwasan -fsanitize=cfi -fno-sanitize-trap=cfi -flto -fvisibility=hidden -fuse-ld=lld %s -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: android

From 4e227702197ece69f9545d3c4903c8268a438459 Mon Sep 17 00:00:00 2001
From: Nilanjana Basu <nilanjana.basu87@gmail.com>
Date: Wed, 17 Jul 2019 23:43:58 +0000
Subject: [PATCH 408/451] Changes to display code view debug info type records
 in hex format

llvm-svn: 366390
---
 llvm/include/llvm/MC/MCExpr.h                 |  12 +-
 llvm/include/llvm/MC/MCStreamer.h             |   7 +
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp |   2 +-
 .../DebugInfo/CodeView/TypeRecordMapping.cpp  |   2 +-
 llvm/lib/MC/MCAsmStreamer.cpp                 |   5 +
 llvm/lib/MC/MCExpr.cpp                        |  17 +-
 llvm/test/DebugInfo/COFF/types-basic.ll       | 226 +++++++++---------
 7 files changed, 149 insertions(+), 122 deletions(-)

diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 7d9b265b9d590..fb23c0114c76e 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -134,15 +134,21 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MCExpr &E) {
 ////  Represent a constant integer expression.
 class MCConstantExpr : public MCExpr {
   int64_t Value;
+  bool PrintInHex = false;
 
-  explicit MCConstantExpr(int64_t Value)
+  MCConstantExpr(int64_t Value)
       : MCExpr(MCExpr::Constant, SMLoc()), Value(Value) {}
 
+  MCConstantExpr(int64_t Value, bool PrintInHex)
+      : MCExpr(MCExpr::Constant, SMLoc()), Value(Value),
+        PrintInHex(PrintInHex) {}
+
 public:
   /// \name Construction
   /// @{
 
-  static const MCConstantExpr *create(int64_t Value, MCContext &Ctx);
+  static const MCConstantExpr *create(int64_t Value, MCContext &Ctx,
+                                      bool PrintInHex = false);
 
   /// @}
   /// \name Accessors
@@ -150,6 +156,8 @@ class MCConstantExpr : public MCExpr {
 
   int64_t getValue() const { return Value; }
 
+  bool useHexFormat() const { return PrintInHex; }
+
   /// @}
 
   static bool classof(const MCExpr *E) {
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 67284fb379f38..731e7515448c0 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -626,6 +626,13 @@ class MCStreamer {
   /// to pass in a MCExpr for constant integers.
   virtual void EmitIntValue(uint64_t Value, unsigned Size);
 
+  /// Special case of EmitValue that avoids the client having to pass
+  /// in a MCExpr for constant integers & prints in Hex format for certain
+  /// modes.
+  virtual void EmitIntValueInHex(uint64_t Value, unsigned Size) {
+    EmitIntValue(Value, Size);
+  }
+
   virtual void EmitULEB128Value(const MCExpr *Value);
 
   virtual void EmitSLEB128Value(const MCExpr *Value);
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index bd0ace9e1bb09..932959c311fa1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -103,7 +103,7 @@ class CVMCAdapter : public CodeViewRecordStreamer {
   void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
 
   void EmitIntValue(uint64_t Value, unsigned Size) {
-    OS->EmitIntValue(Value, Size);
+    OS->EmitIntValueInHex(Value, Size);
   }
 
   void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); }
diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index 8e8eba4d53e7f..47928c2eef642 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -306,7 +306,7 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, VFTableRecord &Record) {
     for (auto Name : Record.MethodNames)
       NamesLen += Name.size() + 1;
   }
-  error(IO.mapInteger(NamesLen, ""));
+  error(IO.mapInteger(NamesLen));
   error(IO.mapVectorTail(
       Record.MethodNames,
       [](CodeViewRecordIO &IO, StringRef &S) {
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 7e8f02e3a1aa5..7a2b0b8a12207 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -188,6 +188,7 @@ class MCAsmStreamer final : public MCStreamer {
   void EmitValueImpl(const MCExpr *Value, unsigned Size,
                      SMLoc Loc = SMLoc()) override;
   void EmitIntValue(uint64_t Value, unsigned Size) override;
+  void EmitIntValueInHex(uint64_t Value, unsigned Size) override;
 
   void EmitULEB128Value(const MCExpr *Value) override;
 
@@ -923,6 +924,10 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
   EmitValue(MCConstantExpr::create(Value, getContext()), Size);
 }
 
+void MCAsmStreamer::EmitIntValueInHex(uint64_t Value, unsigned Size) {
+  EmitValue(MCConstantExpr::create(Value, getContext(), true), Size);
+}
+
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   SMLoc Loc) {
   assert(Size <= 8 && "Invalid size");
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index b3384599635ec..ab53ed42778e3 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/MC/MCExpr.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -42,10 +43,15 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
   switch (getKind()) {
   case MCExpr::Target:
     return cast<MCTargetExpr>(this)->printImpl(OS, MAI);
-  case MCExpr::Constant:
-    OS << cast<MCConstantExpr>(*this).getValue();
+  case MCExpr::Constant: {
+    auto Value = cast<MCConstantExpr>(*this).getValue();
+    auto PrintInHex = cast<MCConstantExpr>(*this).useHexFormat();
+    if (PrintInHex)
+      OS << "0x" << Twine::utohexstr(Value);
+    else
+      OS << Value;
     return;
-
+  }
   case MCExpr::SymbolRef: {
     const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
     const MCSymbol &Sym = SRE.getSymbol();
@@ -160,8 +166,9 @@ const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr,
   return new (Ctx) MCUnaryExpr(Opc, Expr, Loc);
 }
 
-const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) {
-  return new (Ctx) MCConstantExpr(Value);
+const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx,
+                                             bool PrintInHex) {
+  return new (Ctx) MCConstantExpr(Value, PrintInHex);
 }
 
 /* *** */
diff --git a/llvm/test/DebugInfo/COFF/types-basic.ll b/llvm/test/DebugInfo/COFF/types-basic.ll
index a6131988b1c7d..3b62a3920f480 100644
--- a/llvm/test/DebugInfo/COFF/types-basic.ll
+++ b/llvm/test/DebugInfo/COFF/types-basic.ll
@@ -350,12 +350,12 @@
 ; ASM: .section	.debug$T,"dr"
 ; ASM: .p2align	2
 ; ASM: .long	4                       # Debug section magic
-; ASM: .short	18                      # Record length
-; ASM: .short	4609                    # Record kind: LF_ARGLIST
-; ASM: .long	3                       # NumArgs
-; ASM: .long	64                      # Argument
-; ASM: .long	65                      # Argument
-; ASM: .long	19                      # Argument
+; ASM: .short	0x12                    # Record length
+; ASM: .short	0x1201                  # Record kind: LF_ARGLIST
+; ASM: .long	0x3                     # NumArgs
+; ASM: .long	0x40                    # Argument
+; ASM: .long	0x41                    # Argument
+; ASM: .long	0x13                    # Argument
 ; ASM: # ArgList (0x1000) {
 ; ASM: #   TypeLeafKind: LF_ARGLIST (0x1201)
 ; ASM: #   NumArgs: 3
@@ -365,13 +365,13 @@
 ; ASM: #     ArgType: __int64 (0x13)
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	14                      # Record length
-; ASM: .short	4104                    # Record kind: LF_PROCEDURE
-; ASM: .long	3                       # ReturnType
-; ASM: .byte	0                       # CallingConvention
-; ASM: .byte	0                       # FunctionOptions
-; ASM: .short	3                       # NumParameters
-; ASM: .long	4096                    # ArgListType
+; ASM: .short	0xe                     # Record length
+; ASM: .short	0x1008                  # Record kind: LF_PROCEDURE
+; ASM: .long	0x3                     # ReturnType
+; ASM: .byte	0x0                     # CallingConvention
+; ASM: .byte	0x0                     # FunctionOptions
+; ASM: .short	0x3                     # NumParameters
+; ASM: .long	0x1000                  # ArgListType
 ; ASM: # Procedure (0x1001) {
 ; ASM: #   TypeLeafKind: LF_PROCEDURE (0x1008)
 ; ASM: #   ReturnType: void (0x3)
@@ -381,10 +381,10 @@
 ; ASM: #   NumParameters: 3
 ; ASM: #   ArgListType: (float, double, __int64) (0x1000)
 ; ASM: # }
-; ASM: .short	14                      # Record length
-; ASM: .short	5633                    # Record kind: LF_FUNC_ID
-; ASM: .long	0                       # ParentScope
-; ASM: .long	4097                    # FunctionType
+; ASM: .short	0xe                     # Record length
+; ASM: .short	0x1601                  # Record kind: LF_FUNC_ID
+; ASM: .long	0x0                     # ParentScope
+; ASM: .long	0x1001                  # FunctionType
 ; ASM: .asciz	"f"                     # Name
 ; ASM: .byte	242
 ; ASM: .byte	241
@@ -394,10 +394,10 @@
 ; ASM: #   FunctionType: void (float, double, __int64) (0x1001)
 ; ASM: #   Name: f
 ; ASM: # }
-; ASM: .short	10                      # Record length
-; ASM: .short	4097                    # Record kind: LF_MODIFIER
-; ASM: .long	116                     # ModifiedType
-; ASM: .short	1                       # Modifiers
+; ASM: .short	0xa                     # Record length
+; ASM: .short	0x1001                  # Record kind: LF_MODIFIER
+; ASM: .long	0x74                    # ModifiedType
+; ASM: .short	0x1                     # Modifiers
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Modifier (0x1003) {
@@ -407,10 +407,10 @@
 ; ASM: #     Const (0x1)
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	10                      # Record length
-; ASM: .short	4098                    # Record kind: LF_POINTER
-; ASM: .long	4099                    # PointeeType
-; ASM: .long	65548                   # Attributes
+; ASM: .short	0xa                     # Record length
+; ASM: .short	0x1002                  # Record kind: LF_POINTER
+; ASM: .long	0x1003                  # PointeeType
+; ASM: .long	0x1000c                 # Attributes
 ; ASM: # Pointer (0x1004) {
 ; ASM: #   TypeLeafKind: LF_POINTER (0x1002)
 ; ASM: #   PointeeType: const int (0x1003)
@@ -425,14 +425,14 @@
 ; ASM: #   IsThisPtr&&: 0
 ; ASM: #   SizeOf: 8
 ; ASM: # }
-; ASM: .short	22                      # Record length
-; ASM: .short	5381                    # Record kind: LF_STRUCTURE
-; ASM: .short	0                       # MemberCount
-; ASM: .short	128                     # Properties
-; ASM: .long	0                       # FieldList
-; ASM: .long	0                       # DerivedFrom
-; ASM: .long	0                       # VShape
-; ASM: .short	0                       # SizeOf
+; ASM: .short	0x16                    # Record length
+; ASM: .short	0x1505                  # Record kind: LF_STRUCTURE
+; ASM: .short	0x0                     # MemberCount
+; ASM: .short	0x80                    # Properties
+; ASM: .long	0x0                     # FieldList
+; ASM: .long	0x0                     # DerivedFrom
+; ASM: .long	0x0                     # VShape
+; ASM: .short	0x0                     # SizeOf
 ; ASM: .asciz	"A"                     # Name
 ; ASM: # Struct (0x1005) {
 ; ASM: #   TypeLeafKind: LF_STRUCTURE (0x1505)
@@ -446,12 +446,12 @@
 ; ASM: #   SizeOf: 0
 ; ASM: #   Name: A
 ; ASM: # }
-; ASM: .short	18                      # Record length
-; ASM: .short	4098                    # Record kind: LF_POINTER
-; ASM: .long	116                     # PointeeType
-; ASM: .long	32844                   # Attributes
-; ASM: .long	4101                    # ClassType
-; ASM: .short	4                       # Representation
+; ASM: .short	0x12                    # Record length
+; ASM: .short	0x1002                  # Record kind: LF_POINTER
+; ASM: .long	0x74                    # PointeeType
+; ASM: .long	0x804c                  # Attributes
+; ASM: .long	0x1005                  # ClassType
+; ASM: .short	0x4                     # Representation
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Pointer (0x1006) {
@@ -470,10 +470,10 @@
 ; ASM: #   ClassType: A (0x1005)
 ; ASM: #   Representation: GeneralData (0x4)
 ; ASM: # }
-; ASM: .short	10                      # Record length
-; ASM: .short	4098                    # Record kind: LF_POINTER
-; ASM: .long	4101                    # PointeeType
-; ASM: .long	66572                   # Attributes
+; ASM: .short	0xa                     # Record length
+; ASM: .short	0x1002                  # Record kind: LF_POINTER
+; ASM: .long	0x1005                  # PointeeType
+; ASM: .long	0x1040c                 # Attributes
 ; ASM: # Pointer (0x1007) {
 ; ASM: #   TypeLeafKind: LF_POINTER (0x1002)
 ; ASM: #   PointeeType: A (0x1005)
@@ -488,25 +488,25 @@
 ; ASM: #   IsThisPtr&&: 0
 ; ASM: #   SizeOf: 8
 ; ASM: # }
-; ASM: .short	6                       # Record length
-; ASM: .short	4609                    # Record kind: LF_ARGLIST
-; ASM: .long	0                       # NumArgs
+; ASM: .short	0x6                     # Record length
+; ASM: .short	0x1201                  # Record kind: LF_ARGLIST
+; ASM: .long	0x0                     # NumArgs
 ; ASM: # ArgList (0x1008) {
 ; ASM: #   TypeLeafKind: LF_ARGLIST (0x1201)
 ; ASM: #   NumArgs: 0
 ; ASM: #   Arguments [
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	26                      # Record length
-; ASM: .short	4105                    # Record kind: LF_MFUNCTION
-; ASM: .long	3                       # ReturnType
-; ASM: .long	4101                    # ClassType
-; ASM: .long	4103                    # ThisType
-; ASM: .byte	0                       # CallingConvention
-; ASM: .byte	0                       # FunctionOptions
-; ASM: .short	0                       # NumParameters
-; ASM: .long	4104                    # ArgListType
-; ASM: .long	0                       # ThisAdjustment
+; ASM: .short	0x1a                    # Record length
+; ASM: .short	0x1009                  # Record kind: LF_MFUNCTION
+; ASM: .long	0x3                     # ReturnType
+; ASM: .long	0x1005                  # ClassType
+; ASM: .long	0x1007                  # ThisType
+; ASM: .byte	0x0                     # CallingConvention
+; ASM: .byte	0x0                     # FunctionOptions
+; ASM: .short	0x0                     # NumParameters
+; ASM: .long	0x1008                  # ArgListType
+; ASM: .long	0x0                     # ThisAdjustment
 ; ASM: # MemberFunction (0x1009) {
 ; ASM: #   TypeLeafKind: LF_MFUNCTION (0x1009)
 ; ASM: #   ReturnType: void (0x3)
@@ -519,8 +519,8 @@
 ; ASM: #   ArgListType: () (0x1008)
 ; ASM: #   ThisAdjustment: 0
 ; ASM: # }
-; ASM: .short	30                      # Record length
-; ASM: .short	4611                    # Record kind: LF_FIELDLIST
+; ASM: .short	0x1e                    # Record length
+; ASM: .short	0x1203                  # Record kind: LF_FIELDLIST
 ; ASM: .byte	0x0d, 0x15, 0x03, 0x00
 ; ASM: .byte	0x74, 0x00, 0x00, 0x00
 ; ASM: .byte	0x00, 0x00, 0x61, 0x00
@@ -544,14 +544,14 @@
 ; ASM: #     Name: A::f
 ; ASM: #   }
 ; ASM: # }
-; ASM: .short	22                      # Record length
-; ASM: .short	5381                    # Record kind: LF_STRUCTURE
-; ASM: .short	2                       # MemberCount
-; ASM: .short	0                       # Properties
-; ASM: .long	4106                    # FieldList
-; ASM: .long	0                       # DerivedFrom
-; ASM: .long	0                       # VShape
-; ASM: .short	4                       # SizeOf
+; ASM: .short	0x16                    # Record length
+; ASM: .short	0x1505                  # Record kind: LF_STRUCTURE
+; ASM: .short	0x2                     # MemberCount
+; ASM: .short	0x0                     # Properties
+; ASM: .long	0x100a                  # FieldList
+; ASM: .long	0x0                     # DerivedFrom
+; ASM: .long	0x0                     # VShape
+; ASM: .short	0x4                     # SizeOf
 ; ASM: .asciz	"A"                     # Name
 ; ASM: # Struct (0x100B) {
 ; ASM: #   TypeLeafKind: LF_STRUCTURE (0x1505)
@@ -564,32 +564,32 @@
 ; ASM: #   SizeOf: 4
 ; ASM: #   Name: A
 ; ASM: # }
-; ASM: .short	30                      # Record length
-; ASM: .short	5637                    # Record kind: LF_STRING_ID
-; ASM: .long	0                       # Id
+; ASM: .short	0x1e                    # Record length
+; ASM: .short	0x1605                  # Record kind: LF_STRING_ID
+; ASM: .long	0x0                     # Id
 ; ASM: .asciz	"D:\\src\\llvm\\build\\t.cpp" # StringData
 ; ASM: # StringId (0x100C) {
 ; ASM: #   TypeLeafKind: LF_STRING_ID (0x1605)
 ; ASM: #   Id: 0x0
 ; ASM: #   StringData: D:\src\llvm\build\t.cpp
 ; ASM: # }
-; ASM: .short	14                      # Record length
-; ASM: .short	5638                    # Record kind: LF_UDT_SRC_LINE
-; ASM: .long	4107                    # UDT
-; ASM: .long	4108                    # SourceFile
-; ASM: .long	1                       # LineNumber
+; ASM: .short	0xe                     # Record length
+; ASM: .short	0x1606                  # Record kind: LF_UDT_SRC_LINE
+; ASM: .long	0x100b                  # UDT
+; ASM: .long	0x100c                  # SourceFile
+; ASM: .long	0x1                     # LineNumber
 ; ASM: # UdtSourceLine (0x100D) {
 ; ASM: #   TypeLeafKind: LF_UDT_SRC_LINE (0x1606)
 ; ASM: #   UDT: A (0x100B)
 ; ASM: #   SourceFile: D:\src\llvm\build\t.cpp (0x100C)
 ; ASM: #   LineNumber: 1
 ; ASM: # }
-; ASM: .short	18                      # Record length
-; ASM: .short	4098                    # Record kind: LF_POINTER
-; ASM: .long	4105                    # PointeeType
-; ASM: .long	65644                   # Attributes
-; ASM: .long	4101                    # ClassType
-; ASM: .short	8                       # Representation
+; ASM: .short	0x12                    # Record length
+; ASM: .short	0x1002                  # Record kind: LF_POINTER
+; ASM: .long	0x1009                  # PointeeType
+; ASM: .long	0x1006c                 # Attributes
+; ASM: .long	0x1005                  # ClassType
+; ASM: .short	0x8                     # Representation
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Pointer (0x100E) {
@@ -608,10 +608,10 @@
 ; ASM: #   ClassType: A (0x1005)
 ; ASM: #   Representation: GeneralFunction (0x8)
 ; ASM: # }
-; ASM: .short	10                      # Record length
-; ASM: .short	4097                    # Record kind: LF_MODIFIER
-; ASM: .long	3                       # ModifiedType
-; ASM: .short	1                       # Modifiers
+; ASM: .short	0xa                     # Record length
+; ASM: .short	0x1001                  # Record kind: LF_MODIFIER
+; ASM: .long	0x3                     # ModifiedType
+; ASM: .short	0x1                     # Modifiers
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # Modifier (0x100F) {
@@ -621,10 +621,10 @@
 ; ASM: #     Const (0x1)
 ; ASM: #   ]
 ; ASM: # }
-; ASM: .short	10                      # Record length
-; ASM: .short	4098                    # Record kind: LF_POINTER
-; ASM: .long	4111                    # PointeeType
-; ASM: .long	65548                   # Attributes
+; ASM: .short	0xa                     # Record length
+; ASM: .short	0x1002                  # Record kind: LF_POINTER
+; ASM: .long	0x100f                  # PointeeType
+; ASM: .long	0x1000c                 # Attributes
 ; ASM: # Pointer (0x1010) {
 ; ASM: #   TypeLeafKind: LF_POINTER (0x1002)
 ; ASM: #   PointeeType: const void (0x100F)
@@ -639,13 +639,13 @@
 ; ASM: #   IsThisPtr&&: 0
 ; ASM: #   SizeOf: 8
 ; ASM: # }
-; ASM: .short	14                      # Record length
-; ASM: .short	4104                    # Record kind: LF_PROCEDURE
-; ASM: .long	3                       # ReturnType
-; ASM: .byte	0                       # CallingConvention
-; ASM: .byte	0                       # FunctionOptions
-; ASM: .short	0                       # NumParameters
-; ASM: .long	4104                    # ArgListType
+; ASM: .short	0xe                     # Record length
+; ASM: .short	0x1008                  # Record kind: LF_PROCEDURE
+; ASM: .long	0x3                     # ReturnType
+; ASM: .byte	0x0                     # CallingConvention
+; ASM: .byte	0x0                     # FunctionOptions
+; ASM: .short	0x0                     # NumParameters
+; ASM: .long	0x1008                  # ArgListType
 ; ASM: # Procedure (0x1011) {
 ; ASM: #   TypeLeafKind: LF_PROCEDURE (0x1008)
 ; ASM: #   ReturnType: void (0x3)
@@ -655,10 +655,10 @@
 ; ASM: #   NumParameters: 0
 ; ASM: #   ArgListType: () (0x1008)
 ; ASM: # }
-; ASM: .short	22                      # Record length
-; ASM: .short	5633                    # Record kind: LF_FUNC_ID
-; ASM: .long	0                       # ParentScope
-; ASM: .long	4113                    # FunctionType
+; ASM: .short	0x16                    # Record length
+; ASM: .short	0x1601                  # Record kind: LF_FUNC_ID
+; ASM: .long	0x0                     # ParentScope
+; ASM: .long	0x1011                  # FunctionType
 ; ASM: .asciz	"CharTypes"             # Name
 ; ASM: .byte	242
 ; ASM: .byte	241
@@ -668,9 +668,9 @@
 ; ASM: #   FunctionType: void () (0x1011)
 ; ASM: #   Name: CharTypes
 ; ASM: # }
-; ASM: .short	26                      # Record length
-; ASM: .short	5637                    # Record kind: LF_STRING_ID
-; ASM: .long	0                       # Id
+; ASM: .short	0x1a                    # Record length
+; ASM: .short	0x1605                  # Record kind: LF_STRING_ID
+; ASM: .long	0x0                     # Id
 ; ASM: .asciz	"D:\\src\\llvm\\build"  # StringData
 ; ASM: .byte	242
 ; ASM: .byte	241
@@ -679,9 +679,9 @@
 ; ASM: #   Id: 0x0
 ; ASM: #   StringData: D:\src\llvm\build
 ; ASM: # }
-; ASM: .short	14                      # Record length
-; ASM: .short	5637                    # Record kind: LF_STRING_ID
-; ASM: .long	0                       # Id
+; ASM: .short	0xe                     # Record length
+; ASM: .short	0x1605                  # Record kind: LF_STRING_ID
+; ASM: .long	0x0                     # Id
 ; ASM: .asciz	"t.cpp"                 # StringData
 ; ASM: .byte	242
 ; ASM: .byte	241
@@ -690,14 +690,14 @@
 ; ASM: #   Id: 0x0
 ; ASM: #   StringData: t.cpp
 ; ASM: # }
-; ASM: .short	26                      # Record length
-; ASM: .short	5635                    # Record kind: LF_BUILDINFO
-; ASM: .short	5                       # NumArgs
-; ASM: .long	4115                    # Argument
-; ASM: .long	0                       # Argument
-; ASM: .long	4116                    # Argument
-; ASM: .long	0                       # Argument
-; ASM: .long	0                       # Argument
+; ASM: .short	0x1a                    # Record length
+; ASM: .short	0x1603                  # Record kind: LF_BUILDINFO
+; ASM: .short	0x5                     # NumArgs
+; ASM: .long	0x1013                  # Argument
+; ASM: .long	0x0                     # Argument
+; ASM: .long	0x1014                  # Argument
+; ASM: .long	0x0                     # Argument
+; ASM: .long	0x0                     # Argument
 ; ASM: .byte	242
 ; ASM: .byte	241
 ; ASM: # BuildInfo (0x1015) {

From 68983321cc966018d2d0408f1abe920d332033df Mon Sep 17 00:00:00 2001
From: Csaba Dabis <dabis.csaba98@gmail.com>
Date: Thu, 18 Jul 2019 00:03:55 +0000
Subject: [PATCH 409/451] [analyzer] MallocChecker: Prevent Integer Set Library
 false positives

Summary:
Integer Set Library using retain-count based allocation which is not
modeled in MallocChecker.

Reviewed By: NoQ

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64680

llvm-svn: 366391
---
 .../StaticAnalyzer/Checkers/MallocChecker.cpp | 39 ++++++++++++++++++-
 clang/test/Analysis/retain-count-alloc.cpp    | 37 ++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Analysis/retain-count-alloc.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 8d9ab1f9e4834..a79b341890655 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -17,6 +17,7 @@
 #include "clang/AST/ParentMap.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Lex/Lexer.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
@@ -359,6 +360,11 @@ class MallocChecker : public Checker<check::DeadSymbols,
   /// Check if the memory associated with this symbol was released.
   bool isReleased(SymbolRef Sym, CheckerContext &C) const;
 
+  /// See if deallocation happens in a suspicious context. If so, escape the
+  /// pointers that otherwise would have been deallocated and return true.
+  bool suppressDeallocationsInSuspiciousContexts(const CallExpr *CE,
+                                                 CheckerContext &C) const;
+
   bool checkUseAfterFree(SymbolRef Sym, CheckerContext &C, const Stmt *S) const;
 
   void checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C,
@@ -877,6 +883,9 @@ void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const {
       State = ProcessZeroAllocation(C, CE, 0, State);
       State = ProcessZeroAllocation(C, CE, 1, State);
     } else if (FunI == II_free || FunI == II_g_free || FunI == II_kfree) {
+      if (suppressDeallocationsInSuspiciousContexts(CE, C))
+        return;
+
       State = FreeMemAux(C, CE, State, 0, false, ReleasedAllocatedMemory);
     } else if (FunI == II_strdup || FunI == II_win_strdup ||
                FunI == II_wcsdup || FunI == II_win_wcsdup) {
@@ -2532,6 +2541,35 @@ bool MallocChecker::isReleased(SymbolRef Sym, CheckerContext &C) const {
   return (RS && RS->isReleased());
 }
 
+bool MallocChecker::suppressDeallocationsInSuspiciousContexts(
+    const CallExpr *CE, CheckerContext &C) const {
+  if (CE->getNumArgs() == 0)
+    return false;
+
+  StringRef FunctionStr = "";
+  if (const auto *FD = dyn_cast<FunctionDecl>(C.getStackFrame()->getDecl()))
+    if (const Stmt *Body = FD->getBody())
+      if (Body->getBeginLoc().isValid())
+        FunctionStr =
+            Lexer::getSourceText(CharSourceRange::getTokenRange(
+                                     {FD->getBeginLoc(), Body->getBeginLoc()}),
+                                 C.getSourceManager(), C.getLangOpts());
+
+  // We do not model the Integer Set Library's retain-count based allocation.
+  if (!FunctionStr.contains("__isl_"))
+    return false;
+
+  ProgramStateRef State = C.getState();
+
+  for (const Expr *Arg : CE->arguments())
+    if (SymbolRef Sym = C.getSVal(Arg).getAsSymbol())
+      if (const RefState *RS = State->get<RegionState>(Sym))
+        State = State->set<RegionState>(Sym, RefState::getEscaped(RS));
+
+  C.addTransition(State);
+  return true;
+}
+
 bool MallocChecker::checkUseAfterFree(SymbolRef Sym, CheckerContext &C,
                                       const Stmt *S) const {
 
@@ -2833,7 +2871,6 @@ ProgramStateRef MallocChecker::checkPointerEscapeAux(ProgramStateRef State,
     if (const RefState *RS = State->get<RegionState>(sym)) {
       if ((RS->isAllocated() || RS->isAllocatedOfSizeZero()) &&
           CheckRefState(RS)) {
-        State = State->remove<RegionState>(sym);
         State = State->set<RegionState>(sym, RefState::getEscaped(RS));
       }
     }
diff --git a/clang/test/Analysis/retain-count-alloc.cpp b/clang/test/Analysis/retain-count-alloc.cpp
new file mode 100644
index 0000000000000..472cbbf0705e2
--- /dev/null
+++ b/clang/test/Analysis/retain-count-alloc.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_analyze_cc1 \
+// RUN:  -analyzer-checker=core,unix.Malloc \
+// RUN:  -verify %s
+
+// expected-no-diagnostics: We do not model Integer Set Library's retain-count
+//                          based allocation. If any of the parameters has an
+//                          '__isl_' prefixed macro definition we escape every
+//                          of them when we are about to 'free()' something.
+
+#define __isl_take
+#define __isl_keep
+
+struct Object { int Ref; };
+void free(void *);
+
+Object *copyObj(__isl_keep Object *O) {
+  O->Ref++;
+  return O;
+}
+
+void freeObj(__isl_take Object *O) {
+  if (--O->Ref > 0)
+    return;
+
+  free(O); // Here we notice that the parameter contains '__isl_', escape it.
+}
+
+void useAfterFree(__isl_take Object *A) {
+  if (!A)
+    return;
+
+  Object *B = copyObj(A);
+  freeObj(B);
+
+  A->Ref = 13;
+  // no-warning: 'Use of memory after it is freed' was here.
+}

From 0c6ad3d5d19f04f476a7b2d9202dee2e3a6e9f73 Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Thu, 18 Jul 2019 00:21:57 +0000
Subject: [PATCH 410/451] [cmake] Add NATIVE build for cross compiling
 standalone builds

TableGen is a host tool and requires a native variant for every build.
While building as a part of llvm this is trivial and llvm handles it.
However, building standalone means that lldb has to handle this itself.
Add a NATIVE build variant to enable this.

llvm-svn: 366392
---
 lldb/CMakeLists.txt | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index 878faaa969843..6b1b67258b3db 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -39,6 +39,20 @@ if (NOT LLDB_DISABLE_PYTHON)
   add_subdirectory(scripts)
 endif ()
 
+if(CMAKE_CROSSCOMPILING AND LLDB_BUILT_STANDALONE)
+  set(LLVM_USE_HOST_TOOLS ON)
+  include(CrossCompile)
+  if (NOT LLDB_PATH_TO_NATIVE_LLVM_BUILD OR
+      NOT LLDB_PATH_TO_NATIVE_CLANG_BUILD)
+    message(FATAL_ERROR
+      "Crosscompiling standalone requires the variables LLDB_PATH_TO_NATIVE_{CLANG,LLVM}_BUILD
+      for building the native lldb-tblgen used during the build process.")
+  endif()
+  llvm_create_cross_target(lldb NATIVE "" Release
+    -DLLDB_PATH_TO_LLVM_BUILD=${LLDB_PATH_TO_NATIVE_LLVM_BUILD}
+    -DLLDB_PATH_TO_CLANG_BUILD=${LLDB_PATH_TO_NATIVE_CLANG_BUILD})
+endif()
+
 add_subdirectory(utils/TableGen)
 add_subdirectory(source)
 add_subdirectory(tools)

From 9cdd2152b92703bfac118f7cc0b3eb934b95cdaf Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 18 Jul 2019 00:26:03 +0000
Subject: [PATCH 411/451] [Tests] Add a test showing how we handle overaligned
 allocas w/ no-realign-stack

(At the moment, we ignore the alignment requirement.)

llvm-svn: 366393
---
 llvm/test/CodeGen/X86/alloca-overaligned.ll | 55 +++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/alloca-overaligned.ll

diff --git a/llvm/test/CodeGen/X86/alloca-overaligned.ll b/llvm/test/CodeGen/X86/alloca-overaligned.ll
new file mode 100644
index 0000000000000..8ac50d8eb82f2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/alloca-overaligned.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=skylake | FileCheck %s
+
+declare void @capture(i64*)
+
+define void @test_natural() "no-realign-stack" {
+; CHECK-LABEL: test_natural:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rsp, %rdi
+; CHECK-NEXT:    callq capture
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %a = alloca i64
+  call void @capture(i64* %a)
+  ret void
+}
+
+define void @test_realign() {
+; CHECK-LABEL: test_realign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    andq $-64, %rsp
+; CHECK-NEXT:    subq $64, %rsp
+; CHECK-NEXT:    movq %rsp, %rdi
+; CHECK-NEXT:    callq capture
+; CHECK-NEXT:    movq %rbp, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+  %a = alloca i64, align 64
+  call void @capture(i64* %a)
+  ret void
+}
+
+define void @test_norealign() "no-realign-stack" {
+; CHECK-LABEL: test_norealign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rsp, %rdi
+; CHECK-NEXT:    callq capture
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %a = alloca i64, align 64
+  call void @capture(i64* %a)
+  ret void
+}

From bd3e74c73d968582d36e4d60e389002540becc8c Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Thu, 18 Jul 2019 01:26:53 +0000
Subject: [PATCH 412/451] Only build lldb-tblgen if it's not a current target

Summary:
When doing standalone builds, you could potentially be building against
an llvm which also built lldb. If this were the case, you'd be
attempting to build this target twice.

Reviewers: xiaobai

Subscribers: mgorny

Differential Revision: https://reviews.llvm.org/D64847

llvm-svn: 366394
---
 lldb/utils/TableGen/CMakeLists.txt | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/lldb/utils/TableGen/CMakeLists.txt b/lldb/utils/TableGen/CMakeLists.txt
index 76e819160ea6e..45a5d366222f8 100644
--- a/lldb/utils/TableGen/CMakeLists.txt
+++ b/lldb/utils/TableGen/CMakeLists.txt
@@ -1,8 +1,14 @@
-set(LLVM_LINK_COMPONENTS Support)
-
-add_tablegen(lldb-tblgen LLDB
-  LLDBOptionDefEmitter.cpp
-  LLDBTableGen.cpp
-  )
-set_target_properties(lldb-tblgen PROPERTIES FOLDER "LLDB tablegenning")
+# tablegen targets get exported via llvm for LLVMConfig.cmake. So standalone
+# builds of lldb can potentially import this via LLVMConfig and also attempt to
+# build it in tree. So only build it if it doesn't exist.
+if (TARGET lldb-tblgen)
+  set(LLDB_TABLEGEN_EXE $<TARGET_FILE:lldb-tblgen> CACHE STRING "")
+else()
+  set(LLVM_LINK_COMPONENTS Support)
 
+  add_tablegen(lldb-tblgen LLDB
+    LLDBOptionDefEmitter.cpp
+    LLDBTableGen.cpp
+    )
+  set_target_properties(lldb-tblgen PROPERTIES FOLDER "LLDB tablegenning")
+endif()

From f358cf8de22ded93286992bbe9eeb32d47b2426e Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 18 Jul 2019 01:53:08 +0000
Subject: [PATCH 413/451] [AArch64] Add dependency from AArch64CodeGen to
 TransformUtils to fix -DBUILD_SHARED_LIBS=on link error after D64173/r366361

This fixes:

ld.lld: error: undefined symbol: llvm::findAllocaForValue(llvm::Value*, llvm::DenseMap<llvm::Value*, llvm::Alloc aInst*, llvm::DenseMapInfo<llvm::Value*>, llvm::detail::DenseMapPair<llvm::Value*, llvm::AllocaInst*> >&)
>>> referenced by AArch64StackTagging.cpp

llvm-svn: 366396
---
 llvm/lib/Target/AArch64/LLVMBuild.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/LLVMBuild.txt b/llvm/lib/Target/AArch64/LLVMBuild.txt
index d849b246124b5..620d3a857e8ca 100644
--- a/llvm/lib/Target/AArch64/LLVMBuild.txt
+++ b/llvm/lib/Target/AArch64/LLVMBuild.txt
@@ -30,5 +30,5 @@ has_jit = 1
 type = Library
 name = AArch64CodeGen
 parent = AArch64
-required_libraries = AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target GlobalISel
+required_libraries = AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target TransformUtils GlobalISel
 add_to_library_groups = AArch64

From ad73a436dc3bd9a6da0645872b3e9efe95e45e71 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 18 Jul 2019 03:23:47 +0000
Subject: [PATCH 414/451] [RISCV] Don't acccess an invalidated iterator in
 RISCVInstrInfo::removeBranch

Issue found by ASan.

llvm-svn: 366397
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2608906bb4308..99c8d2ef73de5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -290,9 +290,9 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
     return 0;
 
   // Remove the branch.
-  I->eraseFromParent();
   if (BytesRemoved)
     *BytesRemoved += getInstSizeInBytes(*I);
+  I->eraseFromParent();
 
   I = MBB.end();
 
@@ -303,9 +303,9 @@ unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
     return 1;
 
   // Remove the branch.
-  I->eraseFromParent();
   if (BytesRemoved)
     *BytesRemoved += getInstSizeInBytes(*I);
+  I->eraseFromParent();
   return 2;
 }
 

From 8aba95d64cf315d45b5d2faf2c809b7bd7f4e8e7 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 18 Jul 2019 04:02:58 +0000
Subject: [PATCH 415/451] [RISCV] Avoid signed integer overflow UB in
 RISCVMatInt::generateInstSeq

Found by UBSan.

llvm-svn: 366398
---
 llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
index 2504df5ef9bdd..f390ddb89e3c9 100644
--- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp
@@ -64,7 +64,7 @@ void generateInstSeq(int64_t Val, bool IsRV64, InstSeq &Res) {
   // performed when the recursion returns.
 
   int64_t Lo12 = SignExtend64<12>(Val);
-  int64_t Hi52 = (Val + 0x800) >> 12;
+  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
   int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
   Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
 

From 4e8d07fd7db2bd9d0c4384cb6b87e6bad1b4e120 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 18 Jul 2019 04:05:18 +0000
Subject: [PATCH 416/451] [RISCV] Re-land r366331 d RISCV to LLVM_ALL_TARGETS

*San flagged issues should be now be addressed.

llvm-svn: 366399
---
 llvm/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index acebd3feb8bf8..b8eb19848bc58 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -285,6 +285,7 @@ set(LLVM_ALL_TARGETS
   MSP430
   NVPTX
   PowerPC
+  RISCV
   Sparc
   SystemZ
   WebAssembly

From ceeda9f07a82c096128e70db1b9469b92d27e1df Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 18 Jul 2019 04:23:54 +0000
Subject: [PATCH 417/451] [clangd] Fix Fix -Wunused-lambda-capture after
 r366339

llvm-svn: 366400
---
 clang-tools-extra/clangd/QueryDriverDatabase.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp
index bdb928bde352c..28596dafb601f 100644
--- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp
+++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp
@@ -58,14 +58,13 @@ namespace {
 
 std::vector<std::string> parseDriverOutput(llvm::StringRef Output) {
   std::vector<std::string> SystemIncludes;
-  constexpr char const *SIS = "#include <...> search starts here:";
+  const char SIS[] = "#include <...> search starts here:";
   constexpr char const *SIE = "End of search list.";
   llvm::SmallVector<llvm::StringRef, 8> Lines;
   Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
 
-  auto StartIt =
-      std::find_if(Lines.begin(), Lines.end(),
-                   [SIS](llvm::StringRef Line) { return Line.trim() == SIS; });
+  auto StartIt = llvm::find_if(
+      Lines, [SIS](llvm::StringRef Line) { return Line.trim() == SIS; });
   if (StartIt == Lines.end()) {
     elog("System include extraction: start marker not found: {0}", Output);
     return {};

From 1d5cbb7557a18ea9e467993264155ce3b538b572 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 18 Jul 2019 04:54:58 +0000
Subject: [PATCH 418/451] [ELF][test] Merge/rename some basic*.s tests

basic64be.s is a big-endian powerpc64 test that just duplicates what
basic-ppc64.s does. Extend basic-ppc64.s to add big-endian tests.
Delete basic64be.s

Rename basic32.s to basic-i386.s

llvm-svn: 366401
---
 lld/test/ELF/{basic32.s => basic-i386.s} |   6 +-
 lld/test/ELF/basic-ppc64.s               |  56 ++++--
 lld/test/ELF/basic64be.s                 | 217 -----------------------
 3 files changed, 40 insertions(+), 239 deletions(-)
 rename lld/test/ELF/{basic32.s => basic-i386.s} (98%)
 delete mode 100644 lld/test/ELF/basic64be.s

diff --git a/lld/test/ELF/basic32.s b/lld/test/ELF/basic-i386.s
similarity index 98%
rename from lld/test/ELF/basic32.s
rename to lld/test/ELF/basic-i386.s
index 059cbf9327b33..1f37e7adc3069 100644
--- a/lld/test/ELF/basic32.s
+++ b/lld/test/ELF/basic-i386.s
@@ -1,7 +1,7 @@
 # REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=i686-unknown-linux %s -o %t
-# RUN: ld.lld %t -o %t2
-# RUN: llvm-readobj --file-headers --sections -l %t2 | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=i686-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readobj --file-headers --sections -l %t | FileCheck %s
 
 # exits with return code 42 on linux
 .globl _start
diff --git a/lld/test/ELF/basic-ppc64.s b/lld/test/ELF/basic-ppc64.s
index 3dd54debed0e0..cab130212b0b0 100644
--- a/lld/test/ELF/basic-ppc64.s
+++ b/lld/test/ELF/basic-ppc64.s
@@ -1,7 +1,12 @@
 # REQUIRES: ppc
-# # RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t
-# RUN: ld.lld --hash-style=sysv -discard-all -shared %t -o %t2
-# RUN: llvm-readobj --file-headers --sections --section-data -l %t2 | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
+# RUN: ld.lld --hash-style=sysv -discard-all -shared %t.o -o %t.so
+# RUN: llvm-readobj --file-headers --sections --section-data -l %t.so | FileCheck --check-prefixes=CHECK,LE %s
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
+# RUN: ld.lld --hash-style=sysv -discard-all -shared %t.o -o %t.so
+# RUN: llvm-readobj --file-headers --sections --section-data -l %t.so | FileCheck --check-prefixes=CHECK,BE %s
+
 .abiversion 2
 # Exits with return code 55 on linux.
 .text
@@ -10,14 +15,16 @@
   sc
 
 // CHECK:Format: ELF64-ppc64
-// CHECK-NEXT:Arch: powerpc64le
-// CHECK-NEXT:AddressSize: 64bit
-// CHECK-NEXT:LoadName: 
-// CHECK-NEXT:ElfHeader {
+// LE-NEXT:    Arch: powerpc64le
+// BE-NEXT:    Arch: powerpc64{{$}}
+// CHECK-NEXT: AddressSize: 64bit
+// CHECK-NEXT: LoadName:
+// CHECK-NEXT: ElfHeader {
 // CHECK-NEXT:  Ident {
 // CHECK-NEXT:    Magic: (7F 45 4C 46)
 // CHECK-NEXT:    Class: 64-bit (0x2)
-// CHECK-NEXT:    DataEncoding: LittleEndian (0x1)
+// LE-NEXT:       DataEncoding: LittleEndian (0x1)
+// BE-NEXT:       DataEncoding: BigEndian (0x2)
 // CHECK-NEXT:    FileVersion: 1
 // CHECK-NEXT:    OS/ABI: SystemV (0x0)
 // CHECK-NEXT:    ABIVersion: 0
@@ -90,7 +97,8 @@
 // CHECK-NEXT:    AddressAlignment: 4
 // CHECK-NEXT:    EntrySize: 4
 // CHECK-NEXT:    SectionData (
-// CHECK-NEXT:      0000: 01000000 01000000 00000000 00000000  |................|
+// LE-NEXT:         0000: 01000000 01000000 00000000 00000000
+// BE-NEXT:         0000: 00000001 00000001 00000000 00000000
 // CHECK-NEXT:    )
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Section {
@@ -127,7 +135,8 @@
 // CHECK-NEXT:    AddressAlignment: 4
 // CHECK-NEXT:    EntrySize: 0
 // CHECK-NEXT:    SectionData (
-// CHECK-NEXT:      0000: 01000038 37006038 02000044           |...87.`8...D|
+// LE-NEXT:         0000: 01000038 37006038 02000044
+// BE-NEXT:         0000: 38000001 38600037 44000002
 // CHECK-NEXT:    )
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Section {
@@ -146,12 +155,18 @@
 // CHECK-NEXT:    AddressAlignment: 8
 // CHECK-NEXT:    EntrySize: 16
 // CHECK-NEXT:    SectionData (
-// CHECK-NEXT:      0000: 06000000 00000000 00020000 00000000  |
-// CHECK-NEXT:      0010: 0B000000 00000000 18000000 00000000  |
-// CHECK-NEXT:      0020: 05000000 00000000 28020000 00000000  |
-// CHECK-NEXT:      0030: 0A000000 00000000 01000000 00000000  |
-// CHECK-NEXT:      0040: 04000000 00000000 18020000 00000000  |
-// CHECK-NEXT:      0050: 00000000 00000000 00000000 00000000  |
+// LE-NEXT:         0000: 06000000 00000000 00020000 00000000  |
+// LE-NEXT:         0010: 0B000000 00000000 18000000 00000000  |
+// LE-NEXT:         0020: 05000000 00000000 28020000 00000000  |
+// LE-NEXT:         0030: 0A000000 00000000 01000000 00000000  |
+// LE-NEXT:         0040: 04000000 00000000 18020000 00000000  |
+// LE-NEXT:         0050: 00000000 00000000 00000000 00000000  |
+// BE-NEXT:         0000: 00000000 00000006 00000000 00000200  |
+// BE-NEXT:         0010: 00000000 0000000B 00000000 00000018  |
+// BE-NEXT:         0020: 00000000 00000005 00000000 00000228  |
+// BE-NEXT:         0030: 00000000 0000000A 00000000 00000001  |
+// BE-NEXT:         0040: 00000000 00000004 00000000 00000218  |
+// BE-NEXT:         0050: 00000000 00000000 00000000 00000000  |
 // CHECK-NEXT:    )
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Section {
@@ -203,9 +218,12 @@
 // CHECK-NEXT:    AddressAlignment: 8
 // CHECK-NEXT:    EntrySize: 24
 // CHECK-NEXT:    SectionData (
-// CHECK-NEXT:      0000: 00000000 00000000 00000000 00000000  |................|
-// CHECK-NEXT:      0010: 00000000 00000000 01000000 00020500  |................|
-// CHECK-NEXT:      0020: 00000200 00000000 00000000 00000000  |................|
+// LE-NEXT:         0000: 00000000 00000000 00000000 00000000  |................|
+// LE-NEXT:         0010: 00000000 00000000 01000000 00020500  |................|
+// LE-NEXT:         0020: 00000200 00000000 00000000 00000000  |................|
+// BE-NEXT:         0000: 00000000 00000000 00000000 00000000  |................|
+// BE-NEXT:         0010: 00000000 00000000 00000001 00020005  |................|
+// BE-NEXT:         0020: 00000000 00020000 00000000 00000000  |................|
 // CHECK-NEXT:    )
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Section {
diff --git a/lld/test/ELF/basic64be.s b/lld/test/ELF/basic64be.s
deleted file mode 100644
index fa53406ab6b1c..0000000000000
--- a/lld/test/ELF/basic64be.s
+++ /dev/null
@@ -1,217 +0,0 @@
-# REQUIRES: ppc
-# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t
-# RUN: ld.lld -discard-all %t -o %t2
-# RUN: llvm-readobj --file-headers --sections --section-data -l %t2 | FileCheck %s
-
-# exits with return code 42 on linux
-.text
-	li      0,1
-	li      3,42
-	sc
-# CHECK: ElfHeader {
-# CHECK-NEXT:   Ident {
-# CHECK-NEXT:     Magic: (7F 45 4C 46)
-# CHECK-NEXT:     Class: 64-bit (0x2)
-# CHECK-NEXT:     DataEncoding: BigEndian (0x2)
-# CHECK-NEXT:     FileVersion: 1
-# CHECK-NEXT:     OS/ABI: SystemV (0x0)
-# CHECK-NEXT:     ABIVersion: 0
-# CHECK-NEXT:     Unused: (00 00 00 00 00 00 00)
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Type: Executable (0x2)
-# CHECK-NEXT:   Machine: EM_PPC64 (0x15)
-# CHECK-NEXT:   Version: 1
-# CHECK-NEXT:   Entry: 0x10010000
-# CHECK-NEXT:   ProgramHeaderOffset: 0x40
-# CHECK-NEXT:   SectionHeaderOffset: 0x20058
-# CHECK-NEXT:   Flags [ (0x2)
-# CHECK-NEXT:     0x2
-# CHECK-NEXT:   ]
-# CHECK-NEXT:   HeaderSize: 64
-# CHECK-NEXT:   ProgramHeaderEntrySize: 56
-# CHECK-NEXT:   ProgramHeaderCount: 4
-# CHECK-NEXT:   SectionHeaderEntrySize: 64
-# CHECK-NEXT:   SectionHeaderCount: 7
-# CHECK-NEXT:   StringTableSectionIndex: 5
-# CHECK-NEXT: }
-# CHECK-NEXT: Sections [
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 0
-# CHECK-NEXT:     Name:  (0)
-# CHECK-NEXT:     Type: SHT_NULL (0x0)
-# CHECK-NEXT:     Flags [ (0x0)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x0
-# CHECK-NEXT:     Offset: 0x0
-# CHECK-NEXT:     Size: 0
-# CHECK-NEXT:     Link: 0
-# CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 0
-# CHECK-NEXT:     EntrySize: 0
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 1
-# CHECK-NEXT:     Name: .text (1)
-# CHECK-NEXT:     Type: SHT_PROGBITS (0x1)
-# CHECK-NEXT:     Flags [ (0x6)
-# CHECK-NEXT:       SHF_ALLOC (0x2)
-# CHECK-NEXT:       SHF_EXECINSTR (0x4)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x10010000
-# CHECK-NEXT:     Offset: 0x10000
-# CHECK-NEXT:     Size: 12
-# CHECK-NEXT:     Link: 0
-# CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 4
-# CHECK-NEXT:     EntrySize: 0
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:       0000: 38000001 3860002A 44000002           |8...8`.*D...|
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 2
-# CHECK-NEXT:     Name: .branch_lt (7)
-# CHECK-NEXT:     Type: SHT_PROGBITS (0x1)
-# CHECK-NEXT:     Flags [ (0x3)
-# CHECK-NEXT:       SHF_ALLOC (0x2)
-# CHECK-NEXT:       SHF_WRITE (0x1)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x10020000
-# CHECK-NEXT:     Offset: 0x20000
-# CHECK-NEXT:     Size: 0
-# CHECK-NEXT:     Link: 0
-# CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 8
-# CHECK-NEXT:     EntrySize: 0
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 3
-# CHECK-NEXT:     Name: .comment (18)
-# CHECK-NEXT:     Type: SHT_PROGBITS (0x1)
-# CHECK-NEXT:     Flags [ (0x30)
-# CHECK-NEXT:       SHF_MERGE (0x10)
-# CHECK-NEXT:       SHF_STRINGS (0x20)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x0
-# CHECK-NEXT:     Offset: 0x20000
-# CHECK-NEXT:     Size: 8
-# CHECK-NEXT:     Link: 0
-# CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 1
-# CHECK-NEXT:     EntrySize: 1
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:       0000: 4C4C4420 312E3000                    |LLD 1.0.|
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 4
-# CHECK-NEXT:     Name: .symtab (27)
-# CHECK-NEXT:     Type: SHT_SYMTAB (0x2)
-# CHECK-NEXT:     Flags [ (0x0)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x0
-# CHECK-NEXT:     Offset: 0x20008
-# CHECK-NEXT:     Size: 24
-# CHECK-NEXT:     Link: 6
-# CHECK-NEXT:     Info: 1
-# CHECK-NEXT:     AddressAlignment: 8
-# CHECK-NEXT:     EntrySize: 24
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:       0000: 00000000 00000000 00000000 00000000  |................|
-# CHECK-NEXT:       0010: 00000000 00000000                    |........|
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 5
-# CHECK-NEXT:     Name: .shstrtab (35)
-# CHECK-NEXT:     Type: SHT_STRTAB (0x3)
-# CHECK-NEXT:     Flags [ (0x0)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x0
-# CHECK-NEXT:     Offset: 0x20020
-# CHECK-NEXT:     Size: 53
-# CHECK-NEXT:     Link: 0
-# CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 1
-# CHECK-NEXT:     EntrySize: 0
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:       0000: 002E7465 7874002E 6272616E 63685F6C  |..text..branch_l|
-# CHECK-NEXT:       0010: 74002E63 6F6D6D65 6E74002E 73796D74  |t..comment..symt|
-# CHECK-NEXT:       0020: 6162002E 73687374 72746162 002E7374  |ab..shstrtab..st|
-# CHECK-NEXT:       0030: 72746162 00                          |rtab.|
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT:   Section {
-# CHECK-NEXT:     Index: 6
-# CHECK-NEXT:     Name: .strtab (45)
-# CHECK-NEXT:     Type: SHT_STRTAB (0x3)
-# CHECK-NEXT:     Flags [ (0x0)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Address: 0x0
-# CHECK-NEXT:     Offset: 0x20055
-# CHECK-NEXT:     Size: 1
-# CHECK-NEXT:     Link: 0
-# CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 1
-# CHECK-NEXT:     EntrySize: 0
-# CHECK-NEXT:     SectionData (
-# CHECK-NEXT:       0000: 00                                   |.|
-# CHECK-NEXT:     )
-# CHECK-NEXT:   }
-# CHECK-NEXT: ]
-# CHECK-NEXT: ProgramHeaders [
-# CHECK-NEXT:   ProgramHeader {
-# CHECK-NEXT:     Type: PT_PHDR (0x6)
-# CHECK-NEXT:     Offset: 0x40
-# CHECK-NEXT:     VirtualAddress: 0x10000040
-# CHECK-NEXT:     PhysicalAddress: 0x10000040
-# CHECK-NEXT:     FileSize: 280
-# CHECK-NEXT:     MemSize: 280
-# CHECK-NEXT:     Flags [ (0x4)
-# CHECK-NEXT:       PF_R (0x4)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Alignment: 8
-# CHECK-NEXT:   }
-# CHECK-NEXT:   ProgramHeader {
-# CHECK-NEXT:     Type: PT_LOAD (0x1)
-# CHECK-NEXT:     Offset: 0x0
-# CHECK-NEXT:     VirtualAddress: 0x10000000
-# CHECK-NEXT:     PhysicalAddress: 0x10000000
-# CHECK-NEXT:     FileSize: 344
-# CHECK-NEXT:     MemSize: 344
-# CHECK-NEXT:     Flags [ (0x4)
-# CHECK-NEXT:       PF_R (0x4)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Alignment: 65536
-# CHECK-NEXT:   }
-# CHECK-NEXT:   ProgramHeader {
-# CHECK-NEXT:     Type: PT_LOAD (0x1)
-# CHECK-NEXT:     Offset: 0x10000
-# CHECK-NEXT:     VirtualAddress: 0x10010000
-# CHECK-NEXT:     PhysicalAddress: 0x10010000
-# CHECK-NEXT:     FileSize: 4096
-# CHECK-NEXT:     MemSize: 4096
-# CHECK-NEXT:     Flags [ (0x5)
-# CHECK-NEXT:       PF_R (0x4)
-# CHECK-NEXT:       PF_X (0x1)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Alignment: 65536
-# CHECK-NEXT:   }
-# CHECK-NEXT:   ProgramHeader {
-# CHECK-NEXT:     Type: PT_GNU_STACK (0x6474E551)
-# CHECK-NEXT:     Offset: 0x0
-# CHECK-NEXT:     VirtualAddress: 0x0
-# CHECK-NEXT:     PhysicalAddress: 0x0
-# CHECK-NEXT:     FileSize: 0
-# CHECK-NEXT:     MemSize: 0
-# CHECK-NEXT:     Flags [ (0x6)
-# CHECK-NEXT:       PF_R (0x4)
-# CHECK-NEXT:       PF_W (0x2)
-# CHECK-NEXT:     ]
-# CHECK-NEXT:     Alignment: 0
-# CHECK-NEXT:   }
-# CHECK-NEXT: ]

From 44deaf7e54ef55a55965f0c5cf41d4436120f75f Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 18 Jul 2019 05:22:55 +0000
Subject: [PATCH 419/451] [DWARF][RISCV] Add support for RISC-V relocations
 needed for debug info
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When code relaxation is enabled many RISC-V fixups are not resolved but
instead relocations are emitted. This happens even for DWARF debug
sections. Therefore, to properly support the parsing of DWARF debug info
we need to be able to resolve RISC-V relocations. This patch adds:

* Support for RISC-V relocations in RelocationResolver
* DWARF support for two relocations per object file offset
* DWARF changes to support relocations in more DIE fields

The two relocations per offset change is needed because some RISC-V
relocations (used for label differences) come in pairs.

Relocations can also be emitted for DWARF fields where relocations were
not yet evaluated. Adding relocation support for some of these fields is
essencial. On the other hand, LLVM currently emits RISC-V relocations
for fixups that could be safely evaluated, since they can never be
affected by code relaxations. This patch also adds relocation support
for the fields affected by those extraneous relocations (the DWARF unit
entry Length, and the DWARF debug line entry TotalLength and
PrologueLength), for testing purposes.

Differential Revision: https://reviews.llvm.org/D62062
Patch by Luís Marques.

llvm-svn: 366402
---
 lld/ELF/DWARF.cpp                             |   3 +-
 .../llvm/DebugInfo/DWARF/DWARFRelocMap.h      |   4 +-
 llvm/lib/DebugInfo/DWARF/DWARFContext.cpp     |  22 +++-
 .../DebugInfo/DWARF/DWARFDataExtractor.cpp    |   5 +-
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   |   7 +-
 llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp   |   2 +-
 llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp   |   4 +-
 llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp        |   2 +-
 llvm/lib/Object/RelocationResolver.cpp        |  53 +++++++++
 .../DebugInfo/RISCV/dwarf-riscv-relocs.ll     | 105 ++++++++++++++++++
 10 files changed, 194 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll

diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp
index ab06d066b0793..1e4b36f71b540 100644
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@@ -110,7 +110,8 @@ LLDDwarfObj<ELFT>::findAux(const InputSectionBase &sec, uint64_t pos,
   DataRefImpl d;
   d.p = getAddend<ELFT>(rel);
   return RelocAddrEntry{secIndex, RelocationRef(d, nullptr),
-                        LLDRelocationResolver<RelTy>::resolve, val};
+                        val,      Optional<object::RelocationRef>(),
+                        0,        LLDRelocationResolver<RelTy>::resolve};
 }
 
 template <class ELFT>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
index cd022e7882c59..3add711943d09 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h
@@ -20,8 +20,10 @@ namespace llvm {
 struct RelocAddrEntry {
   uint64_t SectionIndex;
   object::RelocationRef Reloc;
-  object::RelocationResolver Resolver;
   uint64_t SymbolValue;
+  Optional<object::RelocationRef> Reloc2;
+  uint64_t SymbolValue2;
+  object::RelocationResolver Resolver;
 };
 
 /// In place of applying the relocations to the data we've read from disk we use
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 321e157665a05..5ede9bf596198 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1651,9 +1651,25 @@ class DWARFObjInMemory final : public DWARFObject {
         //
         // TODO Don't store Resolver in every RelocAddrEntry.
         if (Supports && Supports(Reloc.getType())) {
-          Map->try_emplace(Reloc.getOffset(),
-                           RelocAddrEntry{SymInfoOrErr->SectionIndex, Reloc,
-                                          Resolver, SymInfoOrErr->Address});
+          auto I = Map->try_emplace(
+              Reloc.getOffset(),
+              RelocAddrEntry{SymInfoOrErr->SectionIndex, Reloc,
+                             SymInfoOrErr->Address,
+                             Optional<object::RelocationRef>(), 0, Resolver});
+          // If we didn't successfully insert that's because we already had a
+          // relocation for that offset. Store it as a second relocation in the
+          // same RelocAddrEntry instead.
+          if (!I.second) {
+            RelocAddrEntry &entry = I.first->getSecond();
+            if (entry.Reloc2) {
+              ErrorPolicy EP = HandleError(createError(
+                  "At most two relocations per offset are supported"));
+              if (EP == ErrorPolicy::Halt)
+                return;
+            }
+            entry.Reloc2 = Reloc;
+            entry.SymbolValue2 = SymInfoOrErr->Address;
+          }
         } else {
           SmallString<32> Type;
           Reloc.getTypeName(Type);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index 6f2f992f53e1f..b9adf8cb1d997 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -24,7 +24,10 @@ uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint32_t *Off,
     return A;
   if (SecNdx)
     *SecNdx = E->SectionIndex;
-  return E->Resolver(E->Reloc, E->SymbolValue, A);
+  uint64_t R = E->Resolver(E->Reloc, E->SymbolValue, A);
+  if (E->Reloc2)
+    R = E->Resolver(*E->Reloc2, E->SymbolValue2, R);
+  return R;
 }
 
 Optional<uint64_t>
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 3ee5652a0eb22..a1cb1e8582eda 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -300,7 +300,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
   const uint64_t PrologueOffset = *OffsetPtr;
 
   clear();
-  TotalLength = DebugLineData.getU32(OffsetPtr);
+  TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr);
   if (TotalLength == UINT32_MAX) {
     FormParams.Format = dwarf::DWARF64;
     TotalLength = DebugLineData.getU64(OffsetPtr);
@@ -325,7 +325,8 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData,
     SegSelectorSize = DebugLineData.getU8(OffsetPtr);
   }
 
-  PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength());
+  PrologueLength =
+      DebugLineData.getRelocatedValue(sizeofPrologueLength(), OffsetPtr);
   const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr;
   MinInstLength = DebugLineData.getU8(OffsetPtr);
   if (getVersion() >= 4)
@@ -754,7 +755,7 @@ Error DWARFDebugLine::LineTable::parse(
         // requires the use of DW_LNS_advance_pc. Such assemblers, however,
         // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
         {
-          uint16_t PCOffset = DebugLineData.getU16(OffsetPtr);
+          uint16_t PCOffset = DebugLineData.getRelocatedValue(2, OffsetPtr);
           State.Row.Address.Address += PCOffset;
           if (OS)
             *OS
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index b3e4c911b5aef..290d35511cdba 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -299,7 +299,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
     case DW_FORM_data8:
     case DW_FORM_ref8:
     case DW_FORM_ref_sup8:
-      Value.uval = Data.getU64(OffsetPtr);
+      Value.uval = Data.getRelocatedValue(8, OffsetPtr);
       break;
     case DW_FORM_data16:
       // Treat this like a 16-byte block.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 32bb6d329ae5e..e38e706227da8 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -25,7 +25,7 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
                        "%s table length at offset 0x%" PRIx32,
                        SectionName.data(), *OffsetPtr);
   // TODO: Add support for DWARF64.
-  HeaderData.Length = Data.getU32(OffsetPtr);
+  HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr);
   if (HeaderData.Length == 0xffffffffu)
     return createStringError(errc::not_supported,
                        "DWARF64 is not supported in %s at offset 0x%" PRIx32,
@@ -73,7 +73,7 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
         SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount);
   Data.setAddressSize(HeaderData.AddrSize);
   for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I)
-    Offsets.push_back(Data.getU32(OffsetPtr));
+    Offsets.push_back(Data.getRelocatedValue(4, OffsetPtr));
   return Error::success();
 }
 
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 94bfc8c148f68..b74acf60c7476 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -241,7 +241,7 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
   IndexEntry = Entry;
   if (!IndexEntry && Index)
     IndexEntry = Index->getFromOffset(*offset_ptr);
-  Length = debug_info.getU32(offset_ptr);
+  Length = debug_info.getRelocatedValue(4, offset_ptr);
   FormParams.Format = DWARF32;
   unsigned SizeOfLength = 4;
   if (Length == 0xffffffff) {
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 414165c58b6b9..0a243f32e12cb 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -330,6 +330,55 @@ static uint64_t resolveHexagon(RelocationRef R, uint64_t S, uint64_t A) {
   llvm_unreachable("Invalid relocation type");
 }
 
+static bool supportsRISCV(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_RISCV_NONE:
+  case ELF::R_RISCV_32:
+  case ELF::R_RISCV_64:
+  case ELF::R_RISCV_ADD8:
+  case ELF::R_RISCV_SUB8:
+  case ELF::R_RISCV_ADD16:
+  case ELF::R_RISCV_SUB16:
+  case ELF::R_RISCV_ADD32:
+  case ELF::R_RISCV_SUB32:
+  case ELF::R_RISCV_ADD64:
+  case ELF::R_RISCV_SUB64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveRISCV(RelocationRef R, uint64_t S, uint64_t A) {
+  int64_t RA = getELFAddend(R);
+  switch (R.getType()) {
+  case ELF::R_RISCV_NONE:
+    return A;
+  case ELF::R_RISCV_32:
+    return (S + RA) & 0xFFFFFFFF;
+  case ELF::R_RISCV_64:
+    return S + RA;
+  case ELF::R_RISCV_ADD8:
+    return (A + (S + RA)) & 0xFF;
+  case ELF::R_RISCV_SUB8:
+    return (A - (S + RA)) & 0xFF;
+  case ELF::R_RISCV_ADD16:
+    return (A + (S + RA)) & 0xFFFF;
+  case ELF::R_RISCV_SUB16:
+    return (A - (S + RA)) & 0xFFFF;
+  case ELF::R_RISCV_ADD32:
+    return (A + (S + RA)) & 0xFFFFFFFF;
+  case ELF::R_RISCV_SUB32:
+    return (A - (S + RA)) & 0xFFFFFFFF;
+  case ELF::R_RISCV_ADD64:
+    return (A + (S + RA));
+  case ELF::R_RISCV_SUB64:
+    return (A - (S + RA));
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+}
+
 static bool supportsCOFFX86(uint64_t Type) {
   switch (Type) {
   case COFF::IMAGE_REL_I386_SECREL:
@@ -449,6 +498,8 @@ getRelocationResolver(const ObjectFile &Obj) {
         return {supportsSparc64, resolveSparc64};
       case Triple::amdgcn:
         return {supportsAmdgpu, resolveAmdgpu};
+      case Triple::riscv64:
+        return {supportsRISCV, resolveRISCV};
       default:
         return {nullptr, nullptr};
       }
@@ -477,6 +528,8 @@ getRelocationResolver(const ObjectFile &Obj) {
       return {supportsSparc32, resolveSparc32};
     case Triple::hexagon:
       return {supportsHexagon, resolveHexagon};
+    case Triple::riscv32:
+      return {supportsRISCV, resolveRISCV};
     default:
       return {nullptr, nullptr};
     }
diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
new file mode 100644
index 0000000000000..db6571d0998f5
--- /dev/null
+++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
@@ -0,0 +1,105 @@
+; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o %t.o
+; RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=READOBJ-RELOCS %s
+; RUN: llvm-objdump --source %t.o | FileCheck -check-prefix=OBJDUMP-SOURCE %s
+; RUN: llvm-dwarfdump --debug-info --debug-line %t.o | \
+; RUN:     FileCheck -check-prefix=DWARF-DUMP %s
+
+; Check that we actually have relocations, otherwise this is kind of pointless.
+; READOBJ-RELOCS:  Section (8) .rela.debug_info {
+; READOBJ-RELOCS-NEXT:    0x0 R_RISCV_ADD32 - 0x0
+; READOBJ-RELOCS-NEXT:    0x0 R_RISCV_SUB32 - 0x0
+; READOBJ-RELOCS:  Section (11) .rela.debug_addr {
+; READOBJ-RELOCS-NEXT:    0x0 R_RISCV_ADD32 - 0x0
+; READOBJ-RELOCS-NEXT:    0x0 R_RISCV_SUB32 - 0x0
+; READOBJ-RELOCS:  Section (17) .rela.debug_line {
+; READOBJ-RELOCS-NEXT:    0x0 R_RISCV_ADD32 - 0xFFFFFFFC
+; READOBJ-RELOCS-NEXT:    0x0 R_RISCV_SUB32 .Lline_table_start0 0x0
+
+; Check that we can print the source, even with relocations.
+; OBJDUMP-SOURCE: Disassembly of section .text:
+; OBJDUMP-SOURCE-EMPTY:
+; OBJDUMP-SOURCE-NEXT: 00000000 main:
+; OBJDUMP-SOURCE: ; {
+; OBJDUMP-SOURCE: ; return 0;
+
+; Check that we correctly dump the DWARF info, even with relocations.
+; DWARF-DUMP: DW_AT_name        ("dwarf-riscv-relocs.c")
+; DWARF-DUMP: DW_AT_comp_dir    (".")
+; DWARF-DUMP: DW_AT_name      ("main")
+; DWARF-DUMP: DW_AT_decl_file ("./dwarf-riscv-relocs.c")
+; DWARF-DUMP: DW_AT_decl_line (1)
+; DWARF-DUMP: DW_AT_type      (0x00000032 "int")
+; DWARF-DUMP: DW_AT_name      ("int")
+; DWARF-DUMP: DW_AT_encoding  (DW_ATE_signed)
+; DWARF-DUMP: DW_AT_byte_size (0x04)
+
+; DWARF-DUMP: .debug_line contents:
+; DWARF-DUMP-NEXT: debug_line[0x00000000]
+; DWARF-DUMP-NEXT: Line table prologue:
+; DWARF-DUMP-NEXT:     total_length: 0x0000005f
+; DWARF-DUMP-NEXT:          version: 5
+; DWARF-DUMP-NEXT:     address_size: 4
+; DWARF-DUMP-NEXT:  seg_select_size: 0
+; DWARF-DUMP-NEXT:  prologue_length: 0x0000003e
+; DWARF-DUMP-NEXT:  min_inst_length: 1
+; DWARF-DUMP-NEXT: max_ops_per_inst: 1
+; DWARF-DUMP-NEXT:  default_is_stmt: 1
+; DWARF-DUMP-NEXT:        line_base: -5
+; DWARF-DUMP-NEXT:       line_range: 14
+; DWARF-DUMP-NEXT:      opcode_base: 13
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_copy] = 0
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_advance_pc] = 1
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_advance_line] = 1
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_file] = 1
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_column] = 1
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_negate_stmt] = 0
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_basic_block] = 0
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_const_add_pc] = 0
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
+; DWARF-DUMP-NEXT: standard_opcode_lengths[DW_LNS_set_isa] = 1
+; DWARF-DUMP-NEXT: include_directories[  0] = "."
+; DWARF-DUMP-NEXT: file_names[  0]:
+; DWARF-DUMP-NEXT:            name: "dwarf-riscv-relocs.c"
+; DWARF-DUMP-NEXT:       dir_index: 0
+; DWARF-DUMP-NEXT:    md5_checksum: 05ab89f5481bc9f2d037e7886641e919
+; DWARF-DUMP-NEXT:          source: "int main()\n{\n    return 0;\n}\n"
+; DWARF-DUMP-EMPTY:
+; DWARF-DUMP-NEXT: Address            Line   Column File   ISA Discriminator Flags
+; DWARF-DUMP-NEXT: ------------------ ------ ------ ------ --- ------------- -------------
+; DWARF-DUMP-NEXT: 0x0000000000000000      2      0      0   0             0  is_stmt
+; DWARF-DUMP-NEXT: 0x0000000000000014      3      5      0   0             0  is_stmt prologue_end
+; DWARF-DUMP-NEXT: 0x0000000000000028      3      5      0   0             0  is_stmt end_sequence
+
+; ModuleID = 'dwarf-riscv-relocs.c'
+source_filename = "dwarf-riscv-relocs.c"
+target datalayout = "e-m:e-p:32:32-i64:64-n32-S128"
+target triple = "riscv32"
+
+; Function Attrs: noinline nounwind optnone
+define dso_local i32 @main() #0 !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  ret i32 0, !dbg !11
+}
+
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+relax" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "dwarf-riscv-relocs.c", directory: ".", checksumkind: CSK_MD5, checksum: "05ab89f5481bc9f2d037e7886641e919", source: "int main()\0A{\0A    return 0;\0A}\0A")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 5}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang"}
+!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !8, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !DILocation(line: 3, column: 5, scope: !7)

From 4f93b8b56f5982d19b8b55b8c575887c17e15588 Mon Sep 17 00:00:00 2001
From: Nathan Lanza <nathan@lanza.io>
Date: Thu, 18 Jul 2019 05:24:22 +0000
Subject: [PATCH 420/451] Fix typo in programmer's manual cantFile -> cantFail

llvm-svn: 366403
---
 llvm/docs/ProgrammersManual.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index b6d751d7cbe96..a96f8b4b714ca 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -821,7 +821,7 @@ T value:
 
 Like the ExitOnError utility, cantFail simplifies control flow. Their treatment
 of error cases is very different however: Where ExitOnError is guaranteed to
-terminate the program on an error input, cantFile simply asserts that the result
+terminate the program on an error input, cantFail simply asserts that the result
 is success. In debug builds this will result in an assertion failure if an error
 is encountered. In release builds the behavior of cantFail for failure values is
 undefined. As such, care must be taken in the use of cantFail: clients must be

From 8da0402210232ba439493bf197865835fbf0600e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 18 Jul 2019 06:18:06 +0000
Subject: [PATCH 421/451] [X86] Disable combineConcatVectors for vXi1 vectors.

I'm not convinced the code this calls is properly vetted for
vXi1 vectors. Experimental vector widening legalization testing
for D55251 is now hitting an assertion failure inside
EltsFromConsecutiveLoads. This is occurring from a v2i1 load
having a store size different than its VT size. Hopefully
this commit will keep such issues from happening.

llvm-svn: 366405
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   4 +
 .../X86/avx512vl-intrinsics-upgrade.ll        | 228 +++++++++---------
 2 files changed, 116 insertions(+), 116 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 15d4bde0167e9..0b4bf687e6cfd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43730,6 +43730,10 @@ static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG,
   EVT SrcVT = N->getOperand(0).getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
+  // Don't do anything for i1 vectors.
+  if (VT.getVectorElementType() == MVT::i1)
+    return SDValue();
+
   if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) {
     SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
     if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index fa82fe6a9c1c7..8d0aafc561b33 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -8356,19 +8356,19 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X86-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X86-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X86-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X86-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
@@ -8385,19 +8385,19 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X64-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X64-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X64-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
@@ -8470,61 +8470,59 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X86-LABEL: test_mask_ucmp_q_256:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; X86-NEXT:    vpcmpltuq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x01]
-; X86-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
-; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x04]
-; X86-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe1,0x05]
-; X86-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06]
-; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
+; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
+; X86-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
+; X86-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
+; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
+; X86-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
+; X86-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
+; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
+; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X86-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X86-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X86-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
+; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X86-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X86-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_mask_ucmp_q_256:
 ; X64:       # %bb.0:
-; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; X64-NEXT:    vpcmpltuq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x01]
-; X64-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
-; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x04]
-; X64-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe1,0x05]
-; X64-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06]
-; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
+; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
+; X64-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
+; X64-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
+; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
+; X64-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
+; X64-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
+; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X64-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
+; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X64-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X64-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
@@ -8607,19 +8605,19 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X86-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X86-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X86-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X86-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_mask_cmp_d_128:
@@ -8635,19 +8633,19 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X64-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X64-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X64-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
+; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -8718,60 +8716,58 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X86-LABEL: test_mask_ucmp_d_128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; X86-NEXT:    vpcmpltud %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x01]
-; X86-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
-; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x04]
-; X86-NEXT:    vpcmpnltud %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe1,0x05]
-; X86-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06]
-; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
+; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
+; X86-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
+; X86-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
+; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
+; X86-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
+; X86-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
+; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
+; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X86-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X86-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X86-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
+; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X86-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X86-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_mask_ucmp_d_128:
 ; X64:       # %bb.0:
-; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; X64-NEXT:    vpcmpltud %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x01]
-; X64-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
-; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x04]
-; X64-NEXT:    vpcmpnltud %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe1,0x05]
-; X64-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06]
-; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
-; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
+; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
+; X64-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
+; X64-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
+; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
+; X64-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
+; X64-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
+; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
+; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
-; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
-; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
-; X64-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
+; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
-; X64-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; X64-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
 ; X64-NEXT:    retq # encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0

From 03aaef8e726c68e166616764a8166fb7f504cd53 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucf@cn.ibm.com>
Date: Thu, 18 Jul 2019 06:20:12 +0000
Subject: [PATCH 422/451] [PowerPC][Clang] Remove use of malloc in mm_malloc

Remove dependency of malloc in implementation of mm_malloc function in PowerPC
intrinsics and alignment assumption on glibc.

Reviewed By: Hal Finkel

Differential Revision: https://reviews.llvm.org/D64850

llvm-svn: 366406
---
 clang/lib/Headers/ppc_wrappers/mm_malloc.h |  4 ----
 clang/test/CodeGen/ppc-mm-malloc-le.c      | 24 ++++------------------
 clang/test/CodeGen/ppc-mm-malloc.c         | 24 ++++------------------
 3 files changed, 8 insertions(+), 44 deletions(-)

diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
index 36589194b3e2f..d91d7865c893c 100644
--- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h
+++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
@@ -25,12 +25,8 @@ _mm_malloc (size_t size, size_t alignment)
 {
   /* PowerPC64 ELF V2 ABI requires quadword alignment.  */
   size_t vec_align = sizeof (__vector float);
-  /* Linux GLIBC malloc alignment is at least 2 X ptr size.  */
-  size_t malloc_align = (sizeof (void *) + sizeof (void *));
   void *ptr;
 
-  if (alignment == malloc_align && alignment == vec_align)
-    return malloc (size);
   if (alignment < vec_align)
     alignment = vec_align;
   if (posix_memalign (&ptr, alignment, size) == 0)
diff --git a/clang/test/CodeGen/ppc-mm-malloc-le.c b/clang/test/CodeGen/ppc-mm-malloc-le.c
index 7f9c4c9d52448..ba888afcd194f 100644
--- a/clang/test/CodeGen/ppc-mm-malloc-le.c
+++ b/clang/test/CodeGen/ppc-mm-malloc-le.c
@@ -24,30 +24,14 @@ test_mm_malloc() {
 // CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
 // CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
 // CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
 // CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
-// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
-// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG11]]:
-// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
-// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG16]]:
-// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
-// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
-// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG12]]:
-// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
-// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG8]], [[REG9]]
+// CHECK-NEXT: br i1 [[REG10]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
 // CHECK: [[REG23]]:
 // CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
 // CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
-// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
+// CHECK-NEXT: br label %[[REG24:[0-9a-zA-Z_%.]+]]
 // CHECK: [[REG24]]:
 // CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
 // CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
diff --git a/clang/test/CodeGen/ppc-mm-malloc.c b/clang/test/CodeGen/ppc-mm-malloc.c
index 8559a850a89bc..6e8f6807b50bb 100644
--- a/clang/test/CodeGen/ppc-mm-malloc.c
+++ b/clang/test/CodeGen/ppc-mm-malloc.c
@@ -24,30 +24,14 @@ test_mm_malloc() {
 // CHECK: store i64 [[REG1]], i64* [[REG4:[0-9a-zA-Z_%.]+]], align 8
 // CHECK-NEXT: store i64 [[REG2]], i64* [[REG5:[0-9a-zA-Z_%.]+]], align 8
 // CHECK-NEXT: store i64 16, i64* [[REG6:[0-9a-zA-Z_%.]+]], align 8
-// CHECK-NEXT: store i64 16, i64* [[REG7:[0-9a-zA-Z_%.]+]], align 8
 // CHECK-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG7]], align 8
-// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG8]], [[REG9]]
-// CHECK-NEXT: br i1 [[REG10]], label %[[REG11:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG11]]:
-// CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = icmp eq i64 [[REG13]], [[REG14]]
-// CHECK-NEXT: br i1 [[REG15]], label %[[REG16:[0-9a-zA-Z_%.]+]], label %[[REG12:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG16]]:
-// CHECK-NEXT: [[REG17:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8
-// CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = call noalias i8* @malloc(i64 [[REG17]])
-// CHECK-NEXT: store i8* [[REG18]], i8** [[REG3]], align 8
-// CHECK-NEXT: br label %[[REG19:[0-9a-zA-Z_%.]+]]
-// CHECK: [[REG12]]:
-// CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
-// CHECK-NEXT: [[REG21:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
-// CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG20]], [[REG21]]
-// CHECK-NEXT: br i1 [[REG22]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
+// CHECK-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
+// CHECK-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = icmp ult i64 [[REG8]], [[REG9]]
+// CHECK-NEXT: br i1 [[REG10]], label %[[REG23:[0-9a-zA-Z_%.]+]], label %[[REG24:[0-9a-zA-Z_%.]+]]
 // CHECK: [[REG23]]:
 // CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG6]], align 8
 // CHECK-NEXT: store i64 [[REG25]], i64* [[REG5]], align 8
-// CHECK-NEXT: br label %[[REG12:[0-9a-zA-Z_%.]+]]4
+// CHECK-NEXT: br label %[[REG24:[0-9a-zA-Z_%.]+]]
 // CHECK: [[REG24]]:
 // CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG5]], align 8
 // CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG4]], align 8

From 33a4336bcd1560f04454912dcc98fceaacbdaf93 Mon Sep 17 00:00:00 2001
From: Kang Zhang <shkzhang@cn.ibm.com>
Date: Thu, 18 Jul 2019 06:56:49 +0000
Subject: [PATCH 423/451] [NFC][PowerPC] Add the test to test the pass
 block-placement

llvm-svn: 366407
---
 .../CodeGen/PowerPC/block-placement-1.mir     | 312 ++++++++++++++++++
 llvm/test/CodeGen/PowerPC/block-placement.mir |   7 +-
 2 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/block-placement-1.mir

diff --git a/llvm/test/CodeGen/PowerPC/block-placement-1.mir b/llvm/test/CodeGen/PowerPC/block-placement-1.mir
new file mode 100644
index 0000000000000..01967e46da9fa
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/block-placement-1.mir
@@ -0,0 +1,312 @@
+# RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \
+# RUN:   -run-pass=block-placement -o - %s | FileCheck %s
+--- |
+  ; ModuleID = 'test.ll'
+  source_filename = "test.ll"
+  target datalayout = "e-m:e-i64:64-n32:64"
+  
+  @_ZTIl = external constant i8*
+  @_ZTIi = external constant i8*
+  @_ZTIc = external constant i8*
+  
+  define dso_local void @_Z6calleev() local_unnamed_addr {
+  entry:
+    tail call void @__cxa_rethrow()
+    unreachable
+  }
+  
+  declare void @__cxa_rethrow() local_unnamed_addr
+  
+  define dso_local void @_Z14TestSinglePredv() local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  entry:
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.inc
+    ret void
+  
+  for.body:                                         ; preds = %for.inc, %entry
+    %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 10, %entry ]
+    invoke void @__cxa_rethrow()
+            to label %.noexc unwind label %lpad
+  
+  .noexc:                                           ; preds = %for.body
+    unreachable
+  
+  lpad:                                             ; preds = %for.body
+    %0 = landingpad { i8*, i32 }
+            catch i8* bitcast (i8** @_ZTIl to i8*)
+            catch i8* bitcast (i8** @_ZTIi to i8*)
+            catch i8* null
+    %1 = extractvalue { i8*, i32 } %0, 0
+    %2 = extractvalue { i8*, i32 } %0, 1
+    %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIl to i8*))
+    %matches = icmp eq i32 %2, %3
+    br i1 %matches, label %catch4, label %catch.fallthrough
+  
+  catch4:                                           ; preds = %lpad
+    %4 = tail call i8* @__cxa_begin_catch(i8* %1)
+    invoke void @__cxa_rethrow()
+            to label %unreachable unwind label %lpad6
+  
+  catch.fallthrough:                                ; preds = %lpad
+    %5 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+    %matches1 = icmp eq i32 %2, %5
+    %6 = tail call i8* @__cxa_begin_catch(i8* %1)
+    br i1 %matches1, label %catch2, label %catch
+  
+  catch2:                                           ; preds = %catch.fallthrough
+    tail call void @__cxa_end_catch()
+    br label %for.inc
+  
+  catch:                                            ; preds = %catch.fallthrough
+    tail call void @__cxa_end_catch()
+    br label %for.inc
+  
+  lpad6:                                            ; preds = %catch4
+    %7 = landingpad { i8*, i32 }
+            cleanup
+            catch i8* bitcast (i8** @_ZTIc to i8*)
+    %8 = extractvalue { i8*, i32 } %7, 1
+    %9 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIc to i8*))
+    %matches9 = icmp eq i32 %8, %9
+    br i1 %matches9, label %catch10, label %ehcleanup
+  
+  catch10:                                          ; preds = %lpad6
+    %10 = extractvalue { i8*, i32 } %7, 0
+    %11 = tail call i8* @__cxa_begin_catch(i8* %10)
+    tail call void @__cxa_end_catch()
+    tail call void @__cxa_end_catch()
+    br label %for.inc
+  
+  for.inc:                                          ; preds = %catch10, %catch, %catch2
+    %lsr.iv.next = add nsw i32 %lsr.iv, -1
+    %exitcond = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond, label %for.cond.cleanup, label %for.body
+  
+  ehcleanup:                                        ; preds = %lpad6
+    tail call void @__cxa_end_catch()
+    %exn.obj = extractvalue { i8*, i32 } %7, 0
+    call void @_Unwind_Resume(i8* %exn.obj)
+    unreachable
+  
+  unreachable:                                      ; preds = %catch4
+    unreachable
+  }
+  
+  declare i32 @__gxx_personality_v0(...)
+  
+  ; Function Attrs: nounwind readnone
+  declare i32 @llvm.eh.typeid.for(i8*) #0
+  
+  declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr
+  
+  declare void @__cxa_end_catch() local_unnamed_addr
+  
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+  
+  declare void @_Unwind_Resume(i8*)
+  
+  attributes #0 = { nounwind readnone }
+  attributes #1 = { nounwind }
+
+...
+---
+name:            _Z6calleev
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       32
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 32
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    $x0 = MFLR8 implicit $lr8
+    STD killed $x0, 16, $x1
+    $x1 = STDU $x1, -32, $x1
+    CFI_INSTRUCTION def_cfa_offset 32
+    CFI_INSTRUCTION offset $lr8, 16
+    BL8_NOP @__cxa_rethrow, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+
+...
+---
+name:            _Z14TestSinglePredv
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       64
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 32
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default, 
+      callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', 
+      debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default, 
+      callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '', 
+      debug-info-expression: '', debug-info-location: '' }
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $x29, $x30
+  
+    $x0 = MFLR8 implicit $lr8
+    CFI_INSTRUCTION def_cfa_offset 64
+    CFI_INSTRUCTION offset $lr8, 16
+    CFI_INSTRUCTION offset $x29, -24
+    CFI_INSTRUCTION offset $x30, -16
+    STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.1)
+    STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16)
+    STD killed $x0, 16, $x1
+    $x1 = STDU $x1, -64, $x1
+    renamable $r29 = LI 10
+  
+  bb.1.for.body:
+    successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
+    liveins: $r29
+  
+    EH_LABEL <mcsymbol .Ltmp0>
+    BL8_NOP @__cxa_rethrow, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+    EH_LABEL <mcsymbol .Ltmp1>
+  
+  bb.2..noexc:
+    successors: 
+  
+  
+  bb.3.lpad (landing-pad):
+    successors: %bb.4(0x00000001), %bb.5(0x7fffffff)
+    liveins: $r29, $x3, $x4
+  
+    EH_LABEL <mcsymbol .Ltmp2>
+    $x30 = OR8 killed $x4, $x4
+    renamable $cr0 = CMPLWI renamable $r30, 3
+    BCC 71, killed renamable $cr0, %bb.5
+  
+  bb.4.catch4:
+    successors: %bb.11(0x7ffff800), %bb.6(0x00000800)
+    liveins: $r29, $x3
+  
+    BL8_NOP @__cxa_begin_catch, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
+    EH_LABEL <mcsymbol .Ltmp3>
+    BL8_NOP @__cxa_rethrow, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+    EH_LABEL <mcsymbol .Ltmp4>
+    B %bb.11
+  
+  bb.5.catch.fallthrough:
+    successors: %bb.8(0x80000000)
+    liveins: $r29, $x3, $x30
+  
+    BL8_NOP @__cxa_begin_catch, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def dead $x3
+    renamable $cr0 = CMPLWI killed renamable $r30, 2, implicit $x30
+    B %bb.8
+  
+  bb.6.lpad6 (landing-pad):
+    successors: %bb.7(0x7fffffff), %bb.10(0x00000001)
+    liveins: $r29, $x3, $x4
+  
+    EH_LABEL <mcsymbol .Ltmp5>
+    renamable $cr0 = CMPLWI killed renamable $r4, 4, implicit $x4
+    $x30 = OR8 killed $x3, $x3
+    BCC 70, killed renamable $cr0, %bb.10
+  
+  bb.7.catch10:
+    successors: %bb.8(0x80000000)
+    liveins: $r29, $x30
+  
+    $x3 = OR8 killed $x30, $x30
+    BL8_NOP @__cxa_begin_catch, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit-def $r1, implicit-def dead $x3
+    BL8_NOP @__cxa_end_catch, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+  
+  bb.8.for.inc:
+    successors: %bb.9(0x04000000), %bb.1(0x7c000000)
+    liveins: $r29
+  
+    BL8_NOP @__cxa_end_catch, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+    renamable $r29 = nsw ADDI killed renamable $r29, -1
+    renamable $cr0 = CMPLWI renamable $r29, 0
+    BCC 68, killed renamable $cr0, %bb.1
+  
+  bb.9.for.cond.cleanup:
+    $x1 = ADDI8 $x1, 64
+    $x0 = LD 16, $x1
+    MTLR8 killed $x0, implicit-def $lr8
+    $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16)
+    $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.1)
+    BLR8 implicit $lr8, implicit $rm
+  
+  bb.10.ehcleanup:
+    successors: 
+    liveins: $x30
+  
+    BL8_NOP @__cxa_end_catch, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit-def $r1
+    $x3 = OR8 killed $x30, $x30
+    BL8_NOP @_Unwind_Resume, csr_svr464_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit-def $r1
+  
+  bb.11.unreachable:
+
+  ; CHECK:      bb.1.for.body:
+  ; CHECK:        successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
+  ; CHECK:        B %bb.2
+
+  ; CHECK:      bb.4.catch4:
+  ; CHECK:        successors: %bb.11(0x7ffff800), %bb.6(0x00000800)
+  ; CHECK:        B %bb.11
+
+  ; CHECK:      bb.2..noexc:
+
+  ; CHECK:      bb.11.unreachable: 
+...
diff --git a/llvm/test/CodeGen/PowerPC/block-placement.mir b/llvm/test/CodeGen/PowerPC/block-placement.mir
index 3c6937cdc4a6d..9406e13b354eb 100644
--- a/llvm/test/CodeGen/PowerPC/block-placement.mir
+++ b/llvm/test/CodeGen/PowerPC/block-placement.mir
@@ -209,7 +209,12 @@ body:             |
     BLR8 implicit $lr8, implicit $rm, implicit killed $x3
 
   ; CHECK:      bb.5.if.else.i:
-  ; CHECK:      B %bb.11
+  ; CHECK:        successors: %bb.11(0x80000000)
+  ; CHECK:        B %bb.11
+
+  ; CHECK:      bb.8.while.body.i (align 4):
+  ; CHECK:        successors: %bb.11(0x04000000), %bb.9(0x7c000000)
+  ; CHECK:        BCC 76, killed renamable $cr0, %bb.11
 
   ; CHECK:      bb.11:
   ; CHECK:        renamable $x3 = LI8 1

From 5dd86ab2dd6b3cf447f24a619b2e651a8d7f20fc Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Thu, 18 Jul 2019 07:12:47 +0000
Subject: [PATCH 424/451] clang-tidy release notes: Split and order changes by
 type

Patch by Eugene Zelenko!

llvm-svn: 366408
---
 clang-tools-extra/docs/ReleaseNotes.rst | 119 ++++++++++++------------
 1 file changed, 61 insertions(+), 58 deletions(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index be79ce7dc479a..0f52c53d443c3 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -57,7 +57,7 @@ The improvements are...
 Improvements to clang-query
 ---------------------------
 
-- ...
+The improvements are...
 
 Improvements to clang-rename
 ----------------------------
@@ -109,14 +109,7 @@ Improvements to clang-tidy
 - New :doc:`android-cloexec-pipe2
   <clang-tidy/checks/android-cloexec-pipe2>` check.
 
-  This checks ensures that ``pipe2()`` is called with the O_CLOEXEC flag.
-
-- New :doc:`bugprone-unhandled-self-assignment
-  <clang-tidy/checks/bugprone-unhandled-self-assignment>` check.
-
-  Finds user-defined copy assignment operators which do not protect the code
-  against self-assignment either by checking self-assignment explicitly or
-  using the copy-and-swap or the copy-and-move method.
+  This checks ensures that ``pipe2()`` is called with the ``O_CLOEXEC`` flag.
 
 - New :doc:`bugprone-branch-clone
   <clang-tidy/checks/bugprone-branch-clone>` check.
@@ -131,6 +124,13 @@ Improvements to clang-tidy
   Checks if any calls to POSIX functions (except ``posix_openpt``) expect negative
   return values.
 
+- New :doc:`bugprone-unhandled-self-assignment
+  <clang-tidy/checks/bugprone-unhandled-self-assignment>` check.
+
+  Finds user-defined copy assignment operators which do not protect the code
+  against self-assignment either by checking self-assignment explicitly or
+  using the copy-and-swap or the copy-and-move method.
+
 - New :doc:`fuchsia-default-arguments-calls
   <clang-tidy/checks/fuchsia-default-arguments-calls>` check.
 
@@ -138,13 +138,19 @@ Improvements to clang-tidy
   This was previously done by `fuchsia-default-arguments check`, which has been
   removed.
 
-- New :doc:`fuchsia-default-arguments-calls
-  <clang-tidy/checks/fuchsia-default-arguments-calls>` check.
+- New :doc:`fuchsia-default-arguments-declarations
+  <clang-tidy/checks/fuchsia-default-arguments-declarations>` check.
 
   Warns if a function or method is declared with default parameters.
   This was previously done by `fuchsia-default-arguments check` check, which has
   been removed.
 
+- New :doc:`google-objc-avoid-nsobject-new
+  <clang-tidy/checks/google-objc-avoid-nsobject-new>` check.
+
+  Checks for calls to ``+new`` or overrides of it, which are prohibited by the
+  Google Objective-C style guide.
+
 - New :doc:`google-readability-avoid-underscore-in-googletest-name
   <clang-tidy/checks/google-readability-avoid-underscore-in-googletest-name>`
   check.
@@ -152,11 +158,19 @@ Improvements to clang-tidy
   Checks whether there are underscores in googletest test and test case names in
   test macros, which is prohibited by the Googletest FAQ.
 
-- New :doc:`google-objc-avoid-nsobject-new
-  <clang-tidy/checks/google-objc-avoid-nsobject-new>` check.
+- New :doc:`llvm-prefer-isa-or-dyn-cast-in-conditionals
+  <clang-tidy/checks/llvm-prefer-isa-or-dyn-cast-in-conditionals>` check.
 
-  Checks for calls to ``+new`` or overrides of it, which are prohibited by the
-  Google Objective-C style guide.
+  Looks at conditionals and finds and replaces cases of ``cast<>``,
+  which will assert rather than return a null pointer, and
+  ``dyn_cast<>`` where the return value is not captured. Additionally,
+  finds and replaces cases that match the pattern ``var &&
+  isa<X>(var)``, where ``var`` is evaluated twice.
+
+- New :doc:`modernize-use-trailing-return-type
+  <clang-tidy/checks/modernize-use-trailing-return-type>` check.
+
+  Rewrites function signatures to use a trailing return type.
 
 - New :doc:`objc-super-self <clang-tidy/checks/objc-super-self>` check.
 
@@ -164,6 +178,24 @@ Improvements to clang-tidy
   subclasses of ``NSObject`` and recommends calling a superclass initializer
   instead.
 
+- New :doc:`openmp-exception-escape
+  <clang-tidy/checks/openmp-exception-escape>` check.
+
+  Analyzes OpenMP Structured Blocks and checks that no exception escapes
+  out of the Structured Block it was thrown in.
+
+- New :doc:`openmp-use-default-none
+  <clang-tidy/checks/openmp-use-default-none>` check.
+
+  Finds OpenMP directives that are allowed to contain a ``default`` clause,
+  but either don't specify it or the clause is specified but with the kind
+  other than ``none``, and suggests to use the ``default(none)`` clause.
+
+- New :doc:`readability-convert-member-functions-to-static
+  <clang-tidy/checks/readability-convert-member-functions-to-static>` check.
+
+  Finds non-static member functions that can be made ``static``.
+
 - New alias :doc:`cert-oop54-cpp
   <clang-tidy/checks/cert-oop54-cpp>` to
   :doc:`bugprone-unhandled-self-assignment
@@ -186,70 +218,41 @@ Improvements to clang-tidy
   which greatly reduces warnings related to loops which are unlikely to
   cause an actual functional bug.
 
-- The ‘fuchsia-default-arguments’ check has been removed.
+- Added `UseAssignment` option to :doc:`cppcoreguidelines-pro-type-member-init
+  <clang-tidy/checks/cppcoreguidelines-pro-type-member-init>`
+
+  If set to true, the check will provide fix-its with literal initializers
+  (``int i = 0;``) instead of curly braces (``int i{};``).
+
+- The `fuchsia-default-arguments` check has been removed.
 
   Warnings of function or method calls and declarations with default arguments
   were moved to :doc:`fuchsia-default-arguments-calls
   <clang-tidy/checks/fuchsia-default-arguments-calls>` and
-  :doc:`fuchsia-default-arguments-calls
-  <clang-tidy/checks/fuchsia-default-arguments-calls>` checks respectively.
+  :doc:`fuchsia-default-arguments-declarations
+  <clang-tidy/checks/fuchsia-default-arguments-declarations>` checks
+  respectively.
 
 - The :doc:`google-runtime-int <clang-tidy/checks/google-runtime-int>`
   check has been disabled in Objective-C++.
 
-- The `Acronyms` and `IncludeDefaultAcronyms` options for the
-  :doc:`objc-property-declaration <clang-tidy/checks/objc-property-declaration>`
-  check have been removed.
-
 - The :doc:`modernize-use-override
   <clang-tidy/checks/modernize-use-override>` now supports `OverrideSpelling`
   and `FinalSpelling` options.
 
-- New :doc:`llvm-prefer-isa-or-dyn-cast-in-conditionals
-  <clang-tidy/checks/llvm-prefer-isa-or-dyn-cast-in-conditionals>` check.
-
-  Looks at conditionals and finds and replaces cases of ``cast<>``,
-  which will assert rather than return a null pointer, and
-  ``dyn_cast<>`` where the return value is not captured. Additionally,
-  finds and replaces cases that match the pattern ``var &&
-  isa<X>(var)``, where ``var`` is evaluated twice.
-
-- New :doc:`modernize-use-trailing-return-type
-  <clang-tidy/checks/modernize-use-trailing-return-type>` check.
-
-  Rewrites function signatures to use a trailing return type.
-
 - The :doc:`misc-throw-by-value-catch-by-reference
   <clang-tidy/checks/misc-throw-by-value-catch-by-reference>` now supports
   `WarnOnLargeObject` and `MaxSize` options to warn on any large trivial
   object caught by value.
 
-- Added `UseAssignment` option to :doc:`cppcoreguidelines-pro-type-member-init
-  <clang-tidy/checks/cppcoreguidelines-pro-type-member-init>`
-
-  If set to true, the check will provide fix-its with literal initializers
-  (``int i = 0;``) instead of curly braces (``int i{};``).
-
-- New :doc:`readability-convert-member-functions-to-static
-  <clang-tidy/checks/readability-convert-member-functions-to-static>` check.
-
-  Finds non-static member functions that can be made ``static``.
+- The `Acronyms` and `IncludeDefaultAcronyms` options for the
+  :doc:`objc-property-declaration <clang-tidy/checks/objc-property-declaration>`
+  check have been removed.
 
 Improvements to include-fixer
 -----------------------------
 
-- New :doc:`openmp-exception-escape
-  <clang-tidy/checks/openmp-exception-escape>` check.
-
-  Analyzes OpenMP Structured Blocks and checks that no exception escapes
-  out of the Structured Block it was thrown in.
-
-- New :doc:`openmp-use-default-none
-  <clang-tidy/checks/openmp-use-default-none>` check.
-
-  Finds OpenMP directives that are allowed to contain a ``default`` clause,
-  but either don't specify it or the clause is specified but with the kind
-  other than ``none``, and suggests to use the ``default(none)`` clause.
+The improvements are...
 
 Improvements to clang-include-fixer
 -----------------------------------

From 36082e397dd3861a53f67a0762856acacc5f16df Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Thu, 18 Jul 2019 07:17:49 +0000
Subject: [PATCH 425/451] [CodeComplete] Fix ASTUnit cached completion of
 macros from preamble, broken in r342528

Summary:
The problem is the default LoadExternal with no completer, which happens when
loading global results.

Reviewers: ilya-biryukov, nik

Subscribers: arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64864

llvm-svn: 366409
---
 clang/lib/Sema/SemaCodeComplete.cpp | 6 ++----
 clang/test/Index/complete-macros.c  | 3 +++
 clang/test/Index/complete-macros.h  | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index 93a104e3ade5f..e4bbee86e3502 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -8603,8 +8603,7 @@ void Sema::CodeCompletePreprocessorExpression() {
 
   if (!CodeCompleter || CodeCompleter->includeMacros())
     AddMacroResults(PP, Results,
-                    CodeCompleter ? CodeCompleter->loadExternal() : false,
-                    true);
+                    !CodeCompleter || CodeCompleter->loadExternal(), true);
 
   // defined (<macro>)
   Results.EnterNewScope();
@@ -8801,8 +8800,7 @@ void Sema::GatherGlobalCodeCompletions(
 
   if (!CodeCompleter || CodeCompleter->includeMacros())
     AddMacroResults(PP, Builder,
-                    CodeCompleter ? CodeCompleter->loadExternal() : false,
-                    true);
+                    !CodeCompleter || CodeCompleter->loadExternal(), true);
 
   Results.clear();
   Results.insert(Results.end(), Builder.data(),
diff --git a/clang/test/Index/complete-macros.c b/clang/test/Index/complete-macros.c
index 394f93dea0e2b..a0b1f4ae9e13d 100644
--- a/clang/test/Index/complete-macros.c
+++ b/clang/test/Index/complete-macros.c
@@ -28,7 +28,10 @@ void test_variadic() {
 // RUN: c-index-test -code-completion-at=%s:7:1 %s -I%S | FileCheck -check-prefix=CHECK-CC0 %s
 // CHECK-CC0-NOT: FOO
 // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:7:1 %s -I%S | FileCheck -check-prefix=CHECK-CC1 %s
+// (we had a regression that only occurred when parsing as C++, so check that too)
+// RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:7:1 -x c++ %s -I%S | FileCheck -check-prefix=CHECK-CC1 %s
 // CHECK-CC1: macro definition:{TypedText FOO} (70)
+// CHECK-CC1: macro definition:{TypedText MACRO_IN_HEADER} (70)
 // RUN: c-index-test -code-completion-at=%s:13:13 %s -I%S | FileCheck -check-prefix=CHECK-CC2 %s
 // RUN: c-index-test -code-completion-at=%s:14:8 %s -I%S | FileCheck -check-prefix=CHECK-CC2 %s
 // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:14:8 %s -I%S | FileCheck -check-prefix=CHECK-CC2 %s
diff --git a/clang/test/Index/complete-macros.h b/clang/test/Index/complete-macros.h
index 70f49e31a1e34..c9ccba2cc0e98 100644
--- a/clang/test/Index/complete-macros.h
+++ b/clang/test/Index/complete-macros.h
@@ -2,5 +2,6 @@
 #define COMPLETE_MACROS_H_GUARD
 
 void in_header(int);
+#define MACRO_IN_HEADER 1
 
 #endif

From dad1bebecd87027ccaf9912f8e2bd0b83ef82be9 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 18 Jul 2019 07:25:56 +0000
Subject: [PATCH 426/451] [RISCV][DebugInfo] Fix dwarf-riscv-relocs.ll test on
 Windows

Windows sees DW_AT_decl_file (".\dwarf-riscv-relocs.c") while Linux sees
DW_AT_decl_file ("./dwarf-riscv-relocs.c").

This fixes a failure introduced in rL366402.

llvm-svn: 366410
---
 llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
index db6571d0998f5..c37336c36a0cc 100644
--- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
+++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
@@ -26,7 +26,7 @@
 ; DWARF-DUMP: DW_AT_name        ("dwarf-riscv-relocs.c")
 ; DWARF-DUMP: DW_AT_comp_dir    (".")
 ; DWARF-DUMP: DW_AT_name      ("main")
-; DWARF-DUMP: DW_AT_decl_file ("./dwarf-riscv-relocs.c")
+; DWARF-DUMP: DW_AT_decl_file ("{{.*}}dwarf-riscv-relocs.c")
 ; DWARF-DUMP: DW_AT_decl_line (1)
 ; DWARF-DUMP: DW_AT_type      (0x00000032 "int")
 ; DWARF-DUMP: DW_AT_name      ("int")

From 0ffa833d54707c9d8d83f10ab7019a70c18885ca Mon Sep 17 00:00:00 2001
From: Serguei Katkov <serguei.katkov@azul.com>
Date: Thu, 18 Jul 2019 07:36:20 +0000
Subject: [PATCH 427/451] [LoopInfo] Use early return in branch weight update
 functions. NFC.

llvm-svn: 366411
---
 llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp | 59 ++++++++++----------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index deb38df4420f6..005306cf1898e 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -382,23 +382,23 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
 static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
                                 unsigned IterNumber, unsigned AvgIters,
                                 uint64_t &PeeledHeaderWeight) {
+  if (!PeeledHeaderWeight)
+    return;
   // FIXME: Pick a more realistic distribution.
   // Currently the proportion of weight we assign to the fall-through
   // side of the branch drops linearly with the iteration number, and we use
   // a 0.9 fudge factor to make the drop-off less sharp...
-  if (PeeledHeaderWeight) {
-    uint64_t FallThruWeight =
-        PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
-    uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
-    PeeledHeaderWeight -= ExitWeight;
-
-    unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
-    MDBuilder MDB(LatchBR->getContext());
-    MDNode *WeightNode =
-        HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
-                  : MDB.createBranchWeights(FallThruWeight, ExitWeight);
-    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-  }
+  uint64_t FallThruWeight =
+      PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+  uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+  PeeledHeaderWeight -= ExitWeight;
+
+  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+  MDBuilder MDB(LatchBR->getContext());
+  MDNode *WeightNode =
+      HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+                : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
 }
 
 /// Initialize the weights.
@@ -430,22 +430,23 @@ static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
 static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
                                uint64_t ExitWeight, uint64_t CurHeaderWeight) {
   // Adjust the branch weights on the loop exit.
-  if (ExitWeight) {
-    // The backedge count is the difference of current header weight and
-    // current loop exit weight. If the current header weight is smaller than
-    // the current loop exit weight, we mark the loop backedge weight as 1.
-    uint64_t BackEdgeWeight = 0;
-    if (ExitWeight < CurHeaderWeight)
-      BackEdgeWeight = CurHeaderWeight - ExitWeight;
-    else
-      BackEdgeWeight = 1;
-    MDBuilder MDB(LatchBR->getContext());
-    unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
-    MDNode *WeightNode =
-        HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
-                  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
-    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
-  }
+  if (!ExitWeight)
+    return;
+
+  // The backedge count is the difference of current header weight and
+  // current loop exit weight. If the current header weight is smaller than
+  // the current loop exit weight, we mark the loop backedge weight as 1.
+  uint64_t BackEdgeWeight = 0;
+  if (ExitWeight < CurHeaderWeight)
+    BackEdgeWeight = CurHeaderWeight - ExitWeight;
+  else
+    BackEdgeWeight = 1;
+  MDBuilder MDB(LatchBR->getContext());
+  unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1;
+  MDNode *WeightNode =
+      HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+                : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+  LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
 }
 
 /// Clones the body of the loop L, putting it between \p InsertTop and \p

From b8d352a08bc6530a9de442af8f55649199481f1b Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Thu, 18 Jul 2019 07:52:41 +0000
Subject: [PATCH 428/451] [RISCV] Reset NoPHIS MachineFunctionProperty in
 emitSelectPseudo

We insered PHIS were there were none before, so the property must be
reset. This error was found on an EXPENSIVE_CHECKS build.

llvm-svn: 366412
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5d8a2b0a65009..ce7b85911ab65 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1308,6 +1308,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
     SelectMBBI = Next;
   }
 
+  F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
   return TailMBB;
 }
 

From 63719119c78ca965b5d80e5c20fcfe81ba28f896 Mon Sep 17 00:00:00 2001
From: Serge Guelton <sguelton@redhat.com>
Date: Thu, 18 Jul 2019 08:09:31 +0000
Subject: [PATCH 429/451] Fix asan infinite loop on undefined symbol

Fix llvm#39641

Differential Revision: https://reviews.llvm.org/D63877

llvm-svn: 366413
---
 .../lib/interception/interception_linux.cc    |  9 +++-
 .../asan/TestCases/Linux/dlopen-mixed-c-cxx.c | 42 +++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 compiler-rt/test/asan/TestCases/Linux/dlopen-mixed-c-cxx.c

diff --git a/compiler-rt/lib/interception/interception_linux.cc b/compiler-rt/lib/interception/interception_linux.cc
index d07f060b5b642..4b27102a159c8 100644
--- a/compiler-rt/lib/interception/interception_linux.cc
+++ b/compiler-rt/lib/interception/interception_linux.cc
@@ -33,7 +33,7 @@ static int StrCmp(const char *s1, const char *s2) {
 }
 #endif
 
-static void *GetFuncAddr(const char *name) {
+static void *GetFuncAddr(const char *name, uptr wrapper_addr) {
 #if SANITIZER_NETBSD
   // FIXME: Find a better way to handle renames
   if (StrCmp(name, "sigaction"))
@@ -47,13 +47,18 @@ static void *GetFuncAddr(const char *name) {
     // want the address of the real definition, though, so look it up using
     // RTLD_DEFAULT.
     addr = dlsym(RTLD_DEFAULT, name);
+
+    // In case `name' is not loaded, dlsym ends up finding the actual wrapper.
+    // We don't want to intercept the wrapper and have it point to itself.
+    if ((uptr)addr == wrapper_addr)
+      addr = nullptr;
   }
   return addr;
 }
 
 bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func,
                        uptr wrapper) {
-  void *addr = GetFuncAddr(name);
+  void *addr = GetFuncAddr(name, wrapper);
   *ptr_to_real = (uptr)addr;
   return addr && (func == wrapper);
 }
diff --git a/compiler-rt/test/asan/TestCases/Linux/dlopen-mixed-c-cxx.c b/compiler-rt/test/asan/TestCases/Linux/dlopen-mixed-c-cxx.c
new file mode 100644
index 0000000000000..8bce907ef2e6a
--- /dev/null
+++ b/compiler-rt/test/asan/TestCases/Linux/dlopen-mixed-c-cxx.c
@@ -0,0 +1,42 @@
+// RUN: %clangxx_asan -xc++ -shared -fPIC -o %t.so - < %s
+// RUN: %clang_asan %s -o %t.out -ldl
+// RUN: ASAN_OPTIONS=verbosity=1 not %t.out %t.so 2>&1 | FileCheck %s
+//
+// CHECK: AddressSanitizer: failed to intercept '__cxa_throw'
+//
+// dlopen() can not be intercepted on Android
+// UNSUPPORTED: android
+#ifdef __cplusplus
+
+static void foo(void) {
+  int i = 0;
+  throw(i);
+}
+
+extern "C" {
+int bar(void);
+};
+int bar(void) {
+  try {
+    foo();
+  } catch (int i) {
+    return i;
+  }
+  return -1;
+}
+
+#else
+
+#include <assert.h>
+#include <dlfcn.h>
+
+int main(int argc, char **argv) {
+  int (*bar)(void);
+  void *handle = dlopen(argv[1], RTLD_LAZY);
+  assert(handle);
+  bar = dlsym(handle, "bar");
+  assert(bar);
+  return bar();
+}
+
+#endif

From ce8df1f41d5dae41fbb7af3aafca0935728b8266 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Thu, 18 Jul 2019 08:22:11 +0000
Subject: [PATCH 430/451] [lldb] Don't double emit option groups

We currently emit the option groups twice if Groups<[1,2,3]> is
used in the tablegen. This leads to compilation errors. This
patch just removes the line that accidentially emits the option
group a second time.

llvm-svn: 366414
---
 lldb/utils/TableGen/LLDBOptionDefEmitter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp b/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp
index 00b44c020a201..4e62197d3989f 100644
--- a/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp
+++ b/lldb/utils/TableGen/LLDBOptionDefEmitter.cpp
@@ -44,7 +44,6 @@ static void emitOption(Record *Option, raw_ostream &OS) {
     auto Groups = Option->getValueAsListOfInts("Groups");
     for (int Group : Groups)
       GroupsArg.push_back("LLDB_OPT_SET_" + std::to_string(Group));
-    OS << llvm::join(GroupsArg.begin(), GroupsArg.end(), " | ");
   } else if (Option->getValue("GroupStart")) {
     // The user specified a range of groups (with potentially only one element).
     int GroupStart = Option->getValueAsInt("GroupStart");

From beadf7d0ae10ede0689f43d8d3e5617e62f58faf Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Thu, 18 Jul 2019 08:22:19 +0000
Subject: [PATCH 431/451] [lldb][NFC] Tablegenify type commands

llvm-svn: 366415
---
 lldb/source/Commands/CommandObjectType.cpp | 90 ++++++----------------
 lldb/source/Commands/Options.td            | 79 +++++++++++++++++++
 2 files changed, 101 insertions(+), 68 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp
index 650a8dd216ff6..98a43f50b1b1f 100644
--- a/lldb/source/Commands/CommandObjectType.cpp
+++ b/lldb/source/Commands/CommandObjectType.cpp
@@ -96,23 +96,8 @@ static bool WarnOnPotentialUnquotedUnsignedType(Args &command,
 }
 
 static constexpr OptionDefinition g_type_summary_add_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL,                false, "category",        'w', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,           "Add this to the given category instead of the default one." },
-  { LLDB_OPT_SET_ALL,                false, "cascade",         'C', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeBoolean,        "If true, cascade through typedef chains." },
-  { LLDB_OPT_SET_ALL,                false, "no-value",        'v', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Don't show the value, just show the summary, for this type." },
-  { LLDB_OPT_SET_ALL,                false, "skip-pointers",   'p', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Don't use this format for pointers-to-type objects." },
-  { LLDB_OPT_SET_ALL,                false, "skip-references", 'r', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Don't use this format for references-to-type objects." },
-  { LLDB_OPT_SET_ALL,                false, "regex",           'x', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Type names are actually regular expressions." },
-  { LLDB_OPT_SET_1,                  true,  "inline-children", 'c', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "If true, inline all child values into summary string." },
-  { LLDB_OPT_SET_1,                  false, "omit-names",      'O', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "If true, omit value names in the summary display." },
-  { LLDB_OPT_SET_2,                  true,  "summary-string",  's', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeSummaryString,  "Summary string used to display text and object contents." },
-  { LLDB_OPT_SET_3,                  false, "python-script",   'o', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypePythonScript,   "Give a one-liner Python script as part of the command." },
-  { LLDB_OPT_SET_3,                  false, "python-function", 'F', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypePythonFunction, "Give the name of a Python function to use for this type." },
-  { LLDB_OPT_SET_3,                  false, "input-python",    'P', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Input Python code to use for this type manually." },
-  { LLDB_OPT_SET_2 | LLDB_OPT_SET_3, false, "expand",          'e', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Expand aggregate data types to show children on separate lines." },
-  { LLDB_OPT_SET_2 | LLDB_OPT_SET_3, false, "hide-empty",      'h', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Do not expand aggregate data types with no children." },
-  { LLDB_OPT_SET_2 | LLDB_OPT_SET_3, false, "name",            'n', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,           "A name for this summary string." }
-    // clang-format on
+#define LLDB_OPTIONS_type_summary_add
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeSummaryAdd : public CommandObjectParsed,
@@ -298,15 +283,8 @@ static const char *g_synth_addreader_instructions =
     "class synthProvider:\n";
 
 static constexpr OptionDefinition g_type_synth_add_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "cascade",         'C', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeBoolean,     "If true, cascade through typedef chains." },
-  { LLDB_OPT_SET_ALL, false, "skip-pointers",   'p', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,        "Don't use this format for pointers-to-type objects." },
-  { LLDB_OPT_SET_ALL, false, "skip-references", 'r', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,        "Don't use this format for references-to-type objects." },
-  { LLDB_OPT_SET_ALL, false, "category",        'w', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,        "Add this to the given category instead of the default one." },
-  { LLDB_OPT_SET_2,   false, "python-class",    'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypePythonClass, "Use this Python class to produce synthetic children." },
-  { LLDB_OPT_SET_3,   false, "input-python",    'P', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,        "Type Python code to generate a class that provides synthetic children." },
-  { LLDB_OPT_SET_ALL, false, "regex",           'x', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,        "Type names are actually regular expressions." }
-    // clang-format on
+#define LLDB_OPTIONS_type_synth_add
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeSynthAdd : public CommandObjectParsed,
@@ -526,14 +504,8 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed,
 // CommandObjectTypeFormatAdd
 
 static constexpr OptionDefinition g_type_format_add_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "category",        'w', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,    "Add this to the given category instead of the default one." },
-  { LLDB_OPT_SET_ALL, false, "cascade",         'C', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeBoolean, "If true, cascade through typedef chains." },
-  { LLDB_OPT_SET_ALL, false, "skip-pointers",   'p', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,    "Don't use this format for pointers-to-type objects." },
-  { LLDB_OPT_SET_ALL, false, "skip-references", 'r', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,    "Don't use this format for references-to-type objects." },
-  { LLDB_OPT_SET_ALL, false, "regex",           'x', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,    "Type names are actually regular expressions." },
-  { LLDB_OPT_SET_2,   false, "type",            't', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,    "Format variables as if they were of this type." }
-    // clang-format on
+#define LLDB_OPTIONS_type_format_add
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeFormatAdd : public CommandObjectParsed {
@@ -749,11 +721,8 @@ pointers to floats.  Nor will it change the default display for Afloat and Bfloa
 };
 
 static constexpr OptionDefinition g_type_formatter_delete_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1, false, "all",      'a', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,     "Delete from every category." },
-  { LLDB_OPT_SET_2, false, "category", 'w', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,     "Delete from given category." },
-  { LLDB_OPT_SET_3, false, "language", 'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeLanguage, "Delete from given language's category." }
-    // clang-format on
+#define LLDB_OPTIONS_type_formatter_delete
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeFormatterDelete : public CommandObjectParsed {
@@ -891,9 +860,8 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed {
 };
 
 static constexpr OptionDefinition g_type_formatter_clear_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "all", 'a', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Clear every category." }
-    // clang-format on
+#define LLDB_OPTIONS_type_formatter_clear
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeFormatterClear : public CommandObjectParsed {
@@ -1005,10 +973,8 @@ class CommandObjectTypeFormatClear : public CommandObjectTypeFormatterClear {
 
 
 static constexpr OptionDefinition g_type_formatter_list_options[] = {
-  // clang-format off
-  {LLDB_OPT_SET_1, false, "category-regex", 'w', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,     "Only show categories matching this filter."},
-  {LLDB_OPT_SET_2, false, "language",       'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeLanguage, "Only show the category for a specific language."}
-  // clang-format on
+#define LLDB_OPTIONS_type_formatter_list
+#include "CommandOptions.inc"
 };
 
 template <typename FormatterType>
@@ -1767,10 +1733,8 @@ class CommandObjectTypeSummaryList
 // CommandObjectTypeCategoryDefine
 
 static constexpr OptionDefinition g_type_category_define_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "enabled",  'e', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,     "If specified, this category will be created enabled." },
-  { LLDB_OPT_SET_ALL, false, "language", 'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeLanguage, "Specify the language that this category is supported for." }
-    // clang-format on
+#define LLDB_OPTIONS_type_category_define
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeCategoryDefine : public CommandObjectParsed {
@@ -1872,9 +1836,8 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed {
 // CommandObjectTypeCategoryEnable
 
 static constexpr OptionDefinition g_type_category_enable_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "language", 'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeLanguage, "Enable the category for this language." },
-    // clang-format on
+#define LLDB_OPTIONS_type_category_enable
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeCategoryEnable : public CommandObjectParsed {
@@ -2045,9 +2008,8 @@ class CommandObjectTypeCategoryDelete : public CommandObjectParsed {
 // CommandObjectTypeCategoryDisable
 
 OptionDefinition constexpr g_type_category_disable_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "language", 'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeLanguage, "Enable the category for this language." }
-    // clang-format on
+#define LLDB_OPTIONS_type_category_disable
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeCategoryDisable : public CommandObjectParsed {
@@ -2455,14 +2417,8 @@ bool CommandObjectTypeSynthAdd::AddSynth(ConstString type_name,
 #endif // LLDB_DISABLE_PYTHON
 
 static constexpr OptionDefinition g_type_filter_add_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "cascade",         'C', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeBoolean,        "If true, cascade through typedef chains." },
-  { LLDB_OPT_SET_ALL, false, "skip-pointers",   'p', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Don't use this format for pointers-to-type objects." },
-  { LLDB_OPT_SET_ALL, false, "skip-references", 'r', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Don't use this format for references-to-type objects." },
-  { LLDB_OPT_SET_ALL, false, "category",        'w', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeName,           "Add this to the given category instead of the default one." },
-  { LLDB_OPT_SET_ALL, false, "child",           'c', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeExpressionPath, "Include this expression path in the synthetic view." },
-  { LLDB_OPT_SET_ALL, false, "regex",           'x', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,           "Type names are actually regular expressions." }
-    // clang-format on
+#define LLDB_OPTIONS_type_filter_add
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeFilterAdd : public CommandObjectParsed {
@@ -2708,10 +2664,8 @@ all children of my_foo as if no filter was defined:"
 
 // "type lookup"
 static constexpr OptionDefinition g_type_lookup_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "show-help", 'h', OptionParser::eNoArgument,       nullptr, {}, 0, eArgTypeNone,     "Display available help for types" },
-  { LLDB_OPT_SET_ALL, false, "language",  'l', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeLanguage, "Which language's types should the search scope be" }
-    // clang-format on
+#define LLDB_OPTIONS_type_lookup
+#include "CommandOptions.inc"
 };
 
 class CommandObjectTypeLookup : public CommandObjectRaw {
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 1d1bbbf7b7041..da0c5ba9789bb 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -52,6 +52,85 @@ let Command = "breakpoint list" in {
     "provided, which prime new targets.">;
 }
 
+let Command = "type summary add" in {
+  def type_summary_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
+  def type_summary_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
+  def type_summary_add_no_value : Option<"no-value", "v">, Desc<"Don't show the value, just show the summary, for this type.">;
+  def type_summary_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_summary_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
+  def type_summary_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
+  def type_summary_add_inline_children : Option<"inline-children", "c">, Group<1>, Required, Desc<"If true, inline all child values into summary string.">;
+  def type_summary_add_omit_names : Option<"omit-names", "O">, Group<1>, Desc<"If true, omit value names in the summary display.">;
+  def type_summary_add_summary_string : Option<"summary-string", "s">, Group<2>, Arg<"SummaryString">, Required, Desc<"Summary string used to display text and object contents.">;
+  def type_summary_add_python_script : Option<"python-script", "o">, Group<3>, Arg<"PythonScript">, Desc<"Give a one-liner Python script as part of the command.">;
+  def type_summary_add_python_function : Option<"python-function", "F">, Group<3>, Arg<"PythonFunction">, Desc<"Give the name of a Python function to use for this type.">;
+  def type_summary_add_input_python : Option<"input-python", "P">, Group<3>, Desc<"Input Python code to use for this type manually.">;
+  def type_summary_add_expand : Option<"expand", "e">, Groups<[2,3]>, Desc<"Expand aggregate data types to show children on separate lines.">;
+  def type_summary_add_hide_empty : Option<"hide-empty", "h">, Groups<[2,3]>, Desc<"Do not expand aggregate data types with no children.">;
+  def type_summary_add_name : Option<"name", "n">, Groups<[2,3]>, Arg<"Name">, Desc<"A name for this summary string.">;
+}
+
+let Command = "type synth add" in {
+  def type_synth_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
+  def type_synth_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_synth_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
+  def type_synth_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
+  def type_synth_add_python_class : Option<"python-class", "l">, Group<2>, Arg<"PythonClass">, Desc<"Use this Python class to produce synthetic children.">;
+  def type_synth_add_input_python : Option<"input-python", "P">, Group<3>, Desc<"Type Python code to generate a class that provides synthetic children.">;
+  def type_synth_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
+}
+
+let Command = "type format add" in {
+  def type_format_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
+  def type_format_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
+  def type_format_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_format_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
+  def type_format_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
+  def type_format_add_type : Option<"type", "t">, Group<2>, Arg<"Name">, Desc<"Format variables as if they were of this type.">;
+}
+
+let Command = "type formatter delete" in {
+  def type_formatter_delete_all : Option<"all", "a">, Group<1>, Desc<"Delete from every category.">;
+  def type_formatter_delete_category : Option<"category", "w">, Group<2>, Arg<"Name">, Desc<"Delete from given category.">;
+  def type_formatter_delete_language : Option<"language", "l">, Group<3>, Arg<"Language">, Desc<"Delete from given language's category.">;
+}
+
+let Command = "type formatter clear" in {
+  def type_formatter_clear_all : Option<"all", "a">, Desc<"Clear every category.">;
+}
+
+let Command = "type formatter list" in {
+  def type_formatter_list_category_regex : Option<"category-regex", "w">, Group<1>, Arg<"Name">, Desc<"Only show categories matching this filter.">;
+  def type_formatter_list_language : Option<"language", "l">, Group<2>, Arg<"Language">, Desc<"Only show the category for a specific language.">;
+}
+
+let Command = "type category define" in {
+  def type_category_define_enabled : Option<"enabled", "e">, Desc<"If specified, this category will be created enabled.">;
+  def type_category_define_language : Option<"language", "l">, Arg<"Language">, Desc<"Specify the language that this category is supported for.">;
+}
+
+let Command = "type category enable" in {
+  def type_category_enable_language : Option<"language", "l">, Arg<"Language">, Desc<"Enable the category for this language.">;
+}
+
+let Command = "type category disable" in {
+  def type_category_disable_language : Option<"language", "l">, Arg<"Language">, Desc<"Enable the category for this language.">;
+}
+
+let Command = "type filter add" in {
+  def type_filter_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
+  def type_filter_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_filter_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
+  def type_filter_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
+  def type_filter_add_child : Option<"child", "c">, Arg<"ExpressionPath">, Desc<"Include this expression path in the synthetic view.">;
+  def type_filter_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
+}
+
+let Command = "type lookup" in {
+  def type_lookup_show_help : Option<"show-help", "h">, Desc<"Display available help for types">;
+  def type_lookup_language : Option<"language", "l">, Arg<"Language">, Desc<"Which language's types should the search scope be">;
+}
+
 let Command = "watchpoint list" in {
   def watchpoint_list_brief : Option<"brief", "b">, Group<1>, Desc<"Give a "
     "brief description of the watchpoint (no location info).">;

From f26706fa1c15090774868fd9d259c7882e553319 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana.picus@linaro.org>
Date: Thu, 18 Jul 2019 08:27:44 +0000
Subject: [PATCH 432/451] Fixup r366333 (require x86 in test)

Seems to be required for the other added tests too.

llvm-svn: 366416
---
 lld/test/ELF/vs-diagnostics-versionscript.s | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lld/test/ELF/vs-diagnostics-versionscript.s b/lld/test/ELF/vs-diagnostics-versionscript.s
index 2d0be7fc01b90..2f12ae659b1bd 100644
--- a/lld/test/ELF/vs-diagnostics-versionscript.s
+++ b/lld/test/ELF/vs-diagnostics-versionscript.s
@@ -1,3 +1,4 @@
+# REQUIRES: x86
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
 # RUN: rm -f %/terr1.script
 # RUN: echo  "\"" > %/terr1.script

From 36d9e8358aa2e24abc280af4811eac5d5c65557f Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Thu, 18 Jul 2019 09:12:49 +0000
Subject: [PATCH 433/451] [OpenCL][PR42033] Fix addr space deduction with
 template parameters

If dependent types appear in pointers or references we allow addr
space deduction because the addr space in template argument will
belong to the pointee and not the pointer or reference itself.

We also don't diagnose addr space on a function return type after
template instantiation. If any addr space for the return type was
provided on a template parameter this will be diagnosed during the
parsing of template definition.

Differential Revision: https://reviews.llvm.org/D62584

llvm-svn: 366417
---
 clang/lib/Sema/SemaDecl.cpp                       |  5 ++++-
 clang/lib/Sema/SemaType.cpp                       |  4 +++-
 clang/lib/Sema/TreeTransform.h                    |  7 -------
 .../test/SemaOpenCLCXX/address-space-deduction.cl | 15 +++++++++++++++
 .../test/SemaOpenCLCXX/address-space-templates.cl |  2 +-
 5 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index cc91ec5946844..72b4f6bbcdaf2 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -7491,7 +7491,10 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
             return;
           }
         }
-      } else if (T.getAddressSpace() != LangAS::opencl_private) {
+      } else if (T.getAddressSpace() != LangAS::opencl_private &&
+                 // If we are parsing a template we didn't deduce an addr
+                 // space yet.
+                 T.getAddressSpace() != LangAS::Default) {
         // Do not allow other address spaces on automatic variable.
         Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl) << 1;
         NewVD->setInvalidDecl();
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index bb71db7609f55..29acf6177eb9c 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7419,7 +7419,9 @@ static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State,
       (T->isVoidType() && !IsPointee) ||
       // Do not deduce addr spaces for dependent types because they might end
       // up instantiating to a type with an explicit address space qualifier.
-      T->isDependentType() ||
+      // Except for pointer or reference types because the addr space in
+      // template argument can only belong to a pointee.
+      (T->isDependentType() && !T->isPointerType() && !T->isReferenceType()) ||
       // Do not deduce addr space of decltype because it will be taken from
       // its argument.
       T->isDecltypeType() ||
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 3b841ec649a8f..8df18b5c27844 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -5392,13 +5392,6 @@ QualType TreeTransform<Derived>::TransformFunctionProtoType(
     if (ResultType.isNull())
       return QualType();
 
-    // Return type can not be qualified with an address space.
-    if (ResultType.getAddressSpace() != LangAS::Default) {
-      SemaRef.Diag(TL.getReturnLoc().getBeginLoc(),
-                   diag::err_attribute_address_function_type);
-      return QualType();
-    }
-
     if (getDerived().TransformFunctionTypeParams(
             TL.getBeginLoc(), TL.getParams(),
             TL.getTypePtr()->param_type_begin(),
diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
index 08668951dbca6..4283ec41e6356 100644
--- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
@@ -63,3 +63,18 @@ public:
 //CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &){{( __attribute__.*)?}} __generic'
 template <typename T>
 x3<T>::x3(const x3<T> &t) {}
+
+template <class T>
+T xxx(T *in) {
+  // This pointer can't be deduced to generic because addr space
+  // will be taken from the template argument.
+  //CHECK: `-VarDecl {{.*}} i 'T *' cinit
+  T *i = in;
+  T ii;
+  return *i;
+}
+
+__kernel void test() {
+  int foo[10];
+  xxx(&foo[0]);
+}
diff --git a/clang/test/SemaOpenCLCXX/address-space-templates.cl b/clang/test/SemaOpenCLCXX/address-space-templates.cl
index 48fbdc7642d6f..3fb935766e941 100644
--- a/clang/test/SemaOpenCLCXX/address-space-templates.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-templates.cl
@@ -3,7 +3,7 @@
 template <typename T>
 struct S {
   T a;        // expected-error{{field may not be qualified with an address space}}
-  T f1();     // expected-error{{function type may not be qualified with an address space}}
+  T f1();     // we ignore address space on a return types.
   void f2(T); // expected-error{{parameter may not be qualified with an address space}}
 };
 

From 0b03429a9111562965fe7d467a8bac7ef8309900 Mon Sep 17 00:00:00 2001
From: Chris Jackson <snortotter@gmail.com>
Date: Thu, 18 Jul 2019 09:17:11 +0000
Subject: [PATCH 434/451] [lld] Fix vs-diagnostics-version-script test. NFC.

Removed unnecessary llvm-mc call.

llvm-svn: 366418
---
 lld/test/ELF/vs-diagnostics-versionscript.s | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lld/test/ELF/vs-diagnostics-versionscript.s b/lld/test/ELF/vs-diagnostics-versionscript.s
index 2f12ae659b1bd..12901570dc576 100644
--- a/lld/test/ELF/vs-diagnostics-versionscript.s
+++ b/lld/test/ELF/vs-diagnostics-versionscript.s
@@ -1,8 +1,5 @@
-# REQUIRES: x86
-# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o
-# RUN: rm -f %/terr1.script
-# RUN: echo  "\"" > %/terr1.script
-# RUN: not ld.lld --vs-diagnostics  --version-script %/terr1.script -shared %/t.o -o %/t.so 2>&1 | \
+# RUN: echo "\"" > %/terr1.script
+# RUN: not ld.lld --vs-diagnostics --version-script %/terr1.script 2>&1 | \
 # RUN: FileCheck %s -DSCRIPT="%/terr1.script"
 
 # CHECK: [[SCRIPT]](1): error: [[SCRIPT]]:1: unclosed quote

From c38e3efe27aa6be05a3689974f6a8c9d28c20ee2 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Thu, 18 Jul 2019 09:23:19 +0000
Subject: [PATCH 435/451] [SCEV] add no wrap flag for SCEVAddExpr. Differential
 Revision: https://reviews.llvm.org/D64868

llvm-svn: 366419
---
 llvm/lib/Analysis/ScalarEvolution.cpp              | 2 +-
 llvm/test/Analysis/ScalarEvolution/limit-depth.ll  | 2 +-
 llvm/test/Analysis/ScalarEvolution/nsw.ll          | 2 +-
 llvm/test/Analysis/ScalarEvolution/trip-count12.ll | 2 +-
 llvm/test/Analysis/ScalarEvolution/trip-count9.ll  | 8 ++++----
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d81886fca9da2..bc2cfd6fcc42c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4991,7 +4991,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
   // overflow.
   if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
     if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
-      (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+      (void)getAddRecExpr(getAddExpr(StartVal, Accum, Flags), Accum, L, Flags);
 
   return PHISCEV;
 }
diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
index 6fdf8c5df9742..db68a4f84c912 100644
--- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
+++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
@@ -46,7 +46,7 @@ define void @test_mul(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
 define void @test_sext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
 ; CHECK-LABEL: @test_sext
 ; CHECK:        %se2 = sext i64 %iv2.inc to i128
-; CHECK-NEXT:   -->  {(1 + (sext i64 {(sext i32 (1 + %a) to i64),+,1}<nsw><%loop> to i128))<nsw>,+,1}<nsw><%loop2>
+; CHECK-NEXT:   -->  {(1 + (sext i64 {(sext i32 (1 + %a)<nsw> to i64),+,1}<nsw><%loop> to i128))<nsw>,+,1}<nsw><%loop2>
 entry:
   br label %loop
 
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
index ca24f9d4a04bb..69427368625d4 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll
@@ -163,7 +163,7 @@ bb5:                                              ; preds = %bb2
 declare void @f(i32)
 
 ; CHECK-LABEL: nswnowrap
-; CHECK: --> {(1 + %v)<nsw>,+,1}<nsw><%for.body>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (1 + ((1 + %v)<nsw> smax %v))
+; CHECK: --> {(1 + %v)<nsw>,+,1}<nsw><%for.body>{{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: (2 + %v)
 define void @nswnowrap(i32 %v, i32* %buf) {
 entry:
   %add = add nsw i32 %v, 1
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
index d0086ee2e6acc..5e7d72d5e4f36 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
 
 ; CHECK: Determining loop execution counts for: @test
-; CHECK: Loop %for.body: backedge-taken count is ((-2 + %len) /u 2)
+; CHECK: Loop %for.body: backedge-taken count is ((-2 + %len)<nsw> /u 2)
 ; CHECK: Loop %for.body: max backedge-taken count is 1073741823
 
 define zeroext i16 @test(i16* nocapture %p, i32 %len) nounwind readonly {
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count9.ll b/llvm/test/Analysis/ScalarEvolution/trip-count9.ll
index 9a080b34743f9..c0a1d12fa00e8 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count9.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -179,7 +179,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_startx
-; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax %n))
+; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x)<nsw> smax %n))
 ; CHECK: Loop %loop: max backedge-taken count is -1
 define void @nsw_startx(i4 %n, i4 %x) {
 entry:
@@ -195,7 +195,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @nsw_startx_step2
-; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax %n)) /u 2)
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x)<nsw> smax %n)) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 7
 define void @nsw_startx_step2(i4 %n, i4 %x) {
 entry:
@@ -381,7 +381,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_nsw_startx
-; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
+; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x)<nsw> smax (2 * %n)))
 ; CHECK: Loop %loop: max backedge-taken count is -2
 define void @even_nsw_startx(i4 %n, i4 %x) {
 entry:
@@ -398,7 +398,7 @@ exit:
 }
 
 ; CHECK: Determining loop execution counts for: @even_nsw_startx_step2
-; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax (2 * %n))) /u 2)
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x)<nsw> smax (2 * %n))) /u 2)
 ; CHECK: Loop %loop: max backedge-taken count is 7
 define void @even_nsw_startx_step2(i4 %n, i4 %x) {
 entry:

From 51cdd51807ad0b8b9507c12c679ff618a572c0fc Mon Sep 17 00:00:00 2001
From: Johan Vikstrom <jvikstrom@google.com>
Date: Thu, 18 Jul 2019 09:56:38 +0000
Subject: [PATCH 436/451] [clangd] Added highlightings for template parameters
 and specializations.

Summary: Template parameters and specializations were not being highlighted before. This adds highlightings to those types of tokens by adding two Visit* methods.

Reviewers: hokein, sammccall, ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64855

llvm-svn: 366420
---
 .../clangd/SemanticHighlighting.cpp           | 27 +++++++++++++
 .../clangd/SemanticHighlighting.h             |  1 +
 .../clangd/test/semantic-highlighting.test    |  3 ++
 .../unittests/SemanticHighlightingTests.cpp   | 39 ++++++++++++++++---
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index b6b1c8d8a2cf8..59fa5319d700d 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -99,6 +99,19 @@ class HighlightingTokenCollector
     return true;
   }
 
+  bool VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc &TL) {
+    // TemplateTypeParmTypeLoc does not have a TagDecl in its type ptr.
+    addToken(TL.getBeginLoc(), TL.getDecl());
+    return true;
+  }
+
+  bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc &TL) {
+    if (const TemplateDecl *TD =
+            TL.getTypePtr()->getTemplateName().getAsTemplateDecl())
+      addToken(TL.getBeginLoc(), TD);
+    return true;
+  }
+
   bool VisitTypeLoc(TypeLoc &TL) {
     // This check is for not getting two entries when there are anonymous
     // structs. It also makes us not highlight certain namespace qualifiers
@@ -135,6 +148,10 @@ class HighlightingTokenCollector
     // We highlight class decls, constructor decls and destructor decls as
     // `Class` type. The destructor decls are handled in `VisitTypeLoc` (we will
     // visit a TypeLoc where the underlying Type is a CXXRecordDecl).
+    if (isa<ClassTemplateDecl>(D)) {
+      addToken(Loc, HighlightingKind::Class);
+      return;
+    }
     if (isa<RecordDecl>(D)) {
       addToken(Loc, HighlightingKind::Class);
       return;
@@ -175,6 +192,14 @@ class HighlightingTokenCollector
       addToken(Loc, HighlightingKind::Namespace);
       return;
     }
+    if (isa<TemplateTemplateParmDecl>(D)) {
+      addToken(Loc, HighlightingKind::TemplateParameter);
+      return;
+    }
+    if (isa<TemplateTypeParmDecl>(D)) {
+      addToken(Loc, HighlightingKind::TemplateParameter);
+      return;
+    }
   }
 
   void addToken(SourceLocation Loc, HighlightingKind Kind) {
@@ -297,6 +322,8 @@ llvm::StringRef toTextMateScope(HighlightingKind Kind) {
     return "variable.other.enummember.cpp";
   case HighlightingKind::Namespace:
     return "entity.name.namespace.cpp";
+  case HighlightingKind::TemplateParameter:
+    return "entity.name.type.template.cpp";
   case HighlightingKind::NumKinds:
     llvm_unreachable("must not pass NumKinds to the function");
   }
diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h
index eaeeb861f9a29..f38fa46dec363 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.h
+++ b/clang-tools-extra/clangd/SemanticHighlighting.h
@@ -32,6 +32,7 @@ enum class HighlightingKind {
   Enum,
   EnumConstant,
   Namespace,
+  TemplateParameter,
 
   NumKinds,
 };
diff --git a/clang-tools-extra/clangd/test/semantic-highlighting.test b/clang-tools-extra/clangd/test/semantic-highlighting.test
index 7de25d1713dc7..732fb7b7f30b5 100644
--- a/clang-tools-extra/clangd/test/semantic-highlighting.test
+++ b/clang-tools-extra/clangd/test/semantic-highlighting.test
@@ -27,6 +27,9 @@
 # CHECK-NEXT:          ],
 # CHECK-NEXT:          [
 # CHECK-NEXT:            "entity.name.namespace.cpp"
+# CHECK-NEXT:          ],
+# CHECK-NEXT:          [
+# CHECK-NEXT:            "entity.name.type.template.cpp"
 # CHECK-NEXT:          ]
 # CHECK-NEXT:        ]
 # CHECK-NEXT:      },
diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index f69e336253ca7..2943780b8bdf1 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -40,7 +40,8 @@ void checkHighlightings(llvm::StringRef Code) {
       {HighlightingKind::Namespace, "Namespace"},
       {HighlightingKind::EnumConstant, "EnumConstant"},
       {HighlightingKind::Field, "Field"},
-      {HighlightingKind::Method, "Method"}};
+      {HighlightingKind::Method, "Method"},
+      {HighlightingKind::TemplateParameter, "TemplateParameter"}};
   std::vector<HighlightingToken> ExpectedTokens;
   for (const auto &KindString : KindToString) {
     std::vector<HighlightingToken> Toks = makeHighlightingTokens(
@@ -80,14 +81,14 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
     )cpp",
     R"cpp(
       namespace $Namespace[[abc]] {
-        template<typename T>
+        template<typename $TemplateParameter[[T]]>
         struct $Class[[A]] {
-          T $Field[[t]];
+          $TemplateParameter[[T]] $Field[[t]];
         };
       }
-      template<typename T>
-      struct $Class[[C]] : $Namespace[[abc]]::A<T> {
-        typename T::A* $Field[[D]];
+      template<typename $TemplateParameter[[T]]>
+      struct $Class[[C]] : $Namespace[[abc]]::$Class[[A]]<$TemplateParameter[[T]]> {
+        typename $TemplateParameter[[T]]::A* $Field[[D]];
       };
       $Namespace[[abc]]::$Class[[A]]<int> $Variable[[AA]];
       typedef $Namespace[[abc]]::$Class[[A]]<int> $Class[[AAA]];
@@ -186,6 +187,32 @@ TEST(SemanticHighlighting, GetsCorrectTokens) {
       using $Enum[[CD]] = $Enum[[CC]];
       $Enum[[CC]] $Function[[f]]($Class[[B]]);
       $Enum[[CD]] $Function[[f]]($Class[[BB]]);
+    )cpp",
+    R"cpp(
+      template<typename $TemplateParameter[[T]], typename = void>
+      class $Class[[A]] {
+        $TemplateParameter[[T]] $Field[[AA]];
+        $TemplateParameter[[T]] $Method[[foo]]();
+      };
+      template<class $TemplateParameter[[TT]]>
+      class $Class[[B]] {
+        $Class[[A]]<$TemplateParameter[[TT]]> $Field[[AA]];
+      };
+      template<class $TemplateParameter[[TT]], class $TemplateParameter[[GG]]>
+      class $Class[[BB]] {};
+      template<class $TemplateParameter[[T]]>
+      class $Class[[BB]]<$TemplateParameter[[T]], int> {};
+      template<class $TemplateParameter[[T]]>
+      class $Class[[BB]]<$TemplateParameter[[T]], $TemplateParameter[[T]]*> {};
+
+      template<template<class> class $TemplateParameter[[T]], class $TemplateParameter[[C]]>
+      $TemplateParameter[[T]]<$TemplateParameter[[C]]> $Function[[f]]();
+
+      template<typename>
+      class $Class[[Foo]] {};
+
+      template<typename $TemplateParameter[[T]]>
+      void $Function[[foo]]($TemplateParameter[[T]] ...);
     )cpp"};
   for (const auto &TestCase : TestCases) {
     checkHighlightings(TestCase);

From 46b55fa58dead05222211e327ed90ffab949040b Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Thu, 18 Jul 2019 10:02:35 +0000
Subject: [PATCH 437/451] [OpenCL] Update comments/diagnostics to refer to C++
 for OpenCL

Clang doesn't implement OpenCL C++, change the comments to
reflect that.

Differential Revision: https://reviews.llvm.org/D64867

llvm-svn: 366421
---
 .../clang/Basic/DiagnosticCommonKinds.td       |  8 ++++----
 .../clang/Basic/DiagnosticParseKinds.td        |  4 ++--
 clang/include/clang/Basic/LangOptions.def      |  4 ++--
 clang/include/clang/Basic/TokenKinds.def       |  5 ++---
 clang/include/clang/Frontend/LangStandards.def |  2 +-
 clang/lib/Frontend/InitPreprocessor.cpp        |  2 +-
 clang/lib/Parse/ParseDecl.cpp                  |  3 ++-
 clang/lib/Sema/DeclSpec.cpp                    |  1 -
 clang/lib/Sema/SemaCast.cpp                    |  2 +-
 clang/lib/Sema/SemaDecl.cpp                    | 11 +++++------
 clang/lib/Sema/SemaDeclCXX.cpp                 |  4 ++--
 clang/lib/Sema/SemaExprCXX.cpp                 |  6 +++---
 clang/test/Driver/unknown-std.cl               |  2 +-
 clang/test/Parser/opencl-cxx-keywords.cl       |  2 +-
 clang/test/Parser/opencl-cxx-virtual.cl        |  6 +++---
 clang/test/SemaOpenCLCXX/newdelete.cl          |  8 ++++----
 clang/test/SemaOpenCLCXX/restricted.cl         | 18 +++++++++---------
 17 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 776d16bd544b6..ca2faf59d70f9 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -132,8 +132,8 @@ def err_nullability_conflicting : Error<
 
 // OpenCL Section 6.8.g
 def err_opencl_unknown_type_specifier : Error<
-  "OpenCL %select{C|C++}0 version %1 does not support the '%2' "
-  "%select{type qualifier|storage class specifier}3">;
+  "%select{OpenCL C|C++ for OpenCL}0 version %1 does not support the "
+  "'%2' %select{type qualifier|storage class specifier}3">;
 
 def warn_unknown_attribute_ignored : Warning<
   "unknown attribute %0 ignored">, InGroup<UnknownAttributes>;
@@ -291,9 +291,9 @@ def note_mt_message : Note<"[rewriter] %0">;
 def warn_arcmt_nsalloc_realloc : Warning<"[rewriter] call returns pointer to GC managed memory; it will become unmanaged in ARC">;
 def err_arcmt_nsinvocation_ownership : Error<"NSInvocation's %0 is not safe to be used with an object with ownership other than __unsafe_unretained">;
 
-// OpenCL C++.
+// C++ for OpenCL.
 def err_openclcxx_not_supported : Error<
-  "'%0' is not supported in OpenCL C++">;
+  "'%0' is not supported in C++ for OpenCL">;
 
 // OpenMP
 def err_omp_more_one_clause : Error<
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 608337ed316e8..8e6ced0dea54f 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1154,9 +1154,9 @@ def err_opencl_taking_function_address_parser : Error<
 def err_opencl_logical_exclusive_or : Error<
   "^^ is a reserved operator in OpenCL">;
 
-// OpenCL C++.
+// C++ for OpenCL.
 def err_openclcxx_virtual_function : Error<
-  "virtual functions are not supported in OpenCL C++">;
+  "virtual functions are not supported in C++ for OpenCL">;
 
 // OpenMP support.
 def warn_pragma_omp_ignored : Warning<
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index bbe3f7b77dbc6..31aca2b0d6950 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -197,8 +197,8 @@ LANGOPT(ShortEnums        , 1, 0, "short enum types")
 
 LANGOPT(OpenCL            , 1, 0, "OpenCL")
 LANGOPT(OpenCLVersion     , 32, 0, "OpenCL C version")
-LANGOPT(OpenCLCPlusPlus   , 1, 0, "OpenCL C++")
-LANGOPT(OpenCLCPlusPlusVersion     , 32, 0, "OpenCL C++ version")
+LANGOPT(OpenCLCPlusPlus   , 1, 0, "C++ for OpenCL")
+LANGOPT(OpenCLCPlusPlusVersion     , 32, 0, "C++ for OpenCL version")
 LANGOPT(NativeHalfType    , 1, 0, "Native half type support")
 LANGOPT(NativeHalfArgsAndReturns, 1, 0, "Native half args and returns")
 LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 764f586f1439b..55e94d387c9d4 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -252,9 +252,8 @@ PUNCTUATOR(caretcaret,            "^^")
 //   KEYNOMS18 - This is a keyword that must never be enabled under
 //               MSVC <= v18.
 //   KEYOPENCLC   - This is a keyword in OpenCL C
-//   KEYOPENCLCXX - This is a keyword in OpenCL C++
-//   KEYNOOPENCL  - This is a keyword that is not supported in OpenCL C
-//                  nor in OpenCL C++.
+//   KEYOPENCLCXX - This is a keyword in C++ for OpenCL
+//   KEYNOOPENCL  - This is a keyword that is not supported in OpenCL
 //   KEYALTIVEC - This is a keyword in AltiVec
 //   KEYZVECTOR - This is a keyword for the System z vector extensions,
 //                which are heavily based on AltiVec
diff --git a/clang/include/clang/Frontend/LangStandards.def b/clang/include/clang/Frontend/LangStandards.def
index 44a080d6d12fc..0964e9b90a038 100644
--- a/clang/include/clang/Frontend/LangStandards.def
+++ b/clang/include/clang/Frontend/LangStandards.def
@@ -166,7 +166,7 @@ LANGSTANDARD(opencl20, "cl2.0",
              OpenCL, "OpenCL 2.0",
              LineComment | C99 | Digraphs | HexFloat | OpenCL)
 LANGSTANDARD(openclcpp, "c++",
-             OpenCL, "OpenCL C++ 1.0",
+             OpenCL, "C++ for OpenCL",
              LineComment | CPlusPlus | CPlusPlus11 | CPlusPlus14 | CPlusPlus17 |
              Digraphs | HexFloat | OpenCL)
 
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 11ebab9454871..3906e2ae1b985 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -411,7 +411,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
       if (LangOpts.OpenCLCPlusPlusVersion == 100)
         Builder.defineMacro("__OPENCL_CPP_VERSION__", "100");
       else
-        llvm_unreachable("Unsupported OpenCL C++ version");
+        llvm_unreachable("Unsupported C++ version for OpenCL");
       Builder.defineMacro("__CL_CPP_VERSION_1_0__", "100");
     } else {
       // OpenCL v1.0 and v1.1 do not have a predefined macro to indicate the
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 97fd92bec7875..73b4f50fda460 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -3559,7 +3559,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.setFunctionSpecInline(Loc, PrevSpec, DiagID);
       break;
     case tok::kw_virtual:
-      // OpenCL C++ v1.0 s2.9: the virtual function qualifier is not supported.
+      // C++ for OpenCL does not allow virtual function qualifier, to avoid
+      // function pointers restricted in OpenCL v2.0 s6.9.a.
       if (getLangOpts().OpenCLCPlusPlus) {
         DiagID = diag::err_openclcxx_virtual_function;
         PrevSpec = Tok.getIdentifierInfo()->getNameStart();
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 9433efb181945..77e5eb0956930 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -596,7 +596,6 @@ bool DeclSpec::SetStorageClassSpec(Sema &S, SCS SC, SourceLocation Loc,
   // these storage-class specifiers.
   // OpenCL v1.2 s6.8 changes this to "The auto and register storage-class
   // specifiers are not supported."
-  // OpenCL C++ v1.0 s2.9 restricts register.
   if (S.getLangOpts().OpenCL &&
       !S.getOpenCLOptions().isEnabled("cl_clang_storage_class_specifiers")) {
     switch (SC) {
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index 85d6da700eaad..f184eda2f2738 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -285,7 +285,7 @@ Sema::BuildCXXNamedCast(SourceLocation OpLoc, tok::TokenKind Kind,
                                                 AngleBrackets));
 
   case tok::kw_dynamic_cast: {
-    // OpenCL C++ 1.0 s2.9: dynamic_cast is not supported.
+    // dynamic_cast is not supported in C++ for OpenCL.
     if (getLangOpts().OpenCLCPlusPlus) {
       return ExprError(Diag(OpLoc, diag::err_openclcxx_not_supported)
                        << "dynamic_cast");
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 72b4f6bbcdaf2..a6c52b7d4b2b8 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -6428,8 +6428,8 @@ NamedDecl *Sema::ActOnVariableDeclarator(
       }
     }
 
-    // OpenCL C++ 1.0 s2.9: the thread_local storage qualifier is not
-    // supported.  OpenCL C does not support thread_local either, and
+    // C++ for OpenCL does not allow the thread_local storage qualifier.
+    // OpenCL C does not support thread_local either, and
     // also reject all other thread storage class specifiers.
     DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec();
     if (TSC != TSCS_unspecified) {
@@ -7435,9 +7435,8 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
     // OpenCL C v2.0 s6.5.1 - Variables defined at program scope and static
     // variables inside a function can also be declared in the global
     // address space.
-    // OpenCL C++ v1.0 s2.5 inherits rule from OpenCL C v2.0 and allows local
-    // address space additionally.
-    // FIXME: Add local AS for OpenCL C++.
+    // C++ for OpenCL inherits rule from OpenCL C v2.0.
+    // FIXME: Adding local AS in C++ for OpenCL might make sense.
     if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() ||
         NewVD->hasExternalStorage()) {
       if (!T->isSamplerT() &&
@@ -11590,7 +11589,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
       // do nothing
 
     // OpenCL v1.2 s6.5.3: __constant locals must be constant-initialized.
-    // This is true even in OpenCL C++.
+    // This is true even in C++ for OpenCL.
     } else if (VDecl->getType().getAddressSpace() == LangAS::opencl_constant) {
       CheckForConstantInitializer(Init, DclT);
 
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index cb6b4188039f2..dd77fc55721fb 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13272,7 +13272,7 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
                         diag::err_operator_new_delete_dependent_result_type)
     << FnDecl->getDeclName() << ExpectedResultType;
 
-  // OpenCL C++: the operator is valid on any address space.
+  // The operator is valid on any address space for OpenCL.
   if (SemaRef.getLangOpts().OpenCLCPlusPlus) {
     if (auto *PtrTy = ResultType->getAs<PointerType>()) {
       ResultType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy);
@@ -13305,7 +13305,7 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl,
 
   // Check that the first parameter type is what we expect.
   if (SemaRef.getLangOpts().OpenCLCPlusPlus) {
-    // OpenCL C++: the operator is valid on any address space.
+    // The operator is valid on any address space for OpenCL.
     if (auto *PtrTy =
             FnDecl->getParamDecl(0)->getType()->getAs<PointerType>()) {
       FirstParamType = RemoveAddressSpaceFromPtr(SemaRef, PtrTy);
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index cdca2e8cac6b7..705e3b9bd7fb9 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -529,7 +529,7 @@ ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType,
 ExprResult
 Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc,
                      bool isType, void *TyOrExpr, SourceLocation RParenLoc) {
-  // OpenCL C++ 1.0 s2.9: typeid is not supported.
+  // typeid is not supported in OpenCL.
   if (getLangOpts().OpenCLCPlusPlus) {
     return ExprError(Diag(OpLoc, diag::err_openclcxx_not_supported)
                      << "typeid");
@@ -2656,8 +2656,8 @@ void Sema::DeclareGlobalNewDelete() {
   if (GlobalNewDeleteDeclared)
     return;
 
-  // OpenCL C++ 1.0 s2.9: the implicitly declared new and delete operators
-  // are not supported.
+  // The implicitly declared new and delete operators
+  // are not supported in OpenCL.
   if (getLangOpts().OpenCLCPlusPlus)
     return;
 
diff --git a/clang/test/Driver/unknown-std.cl b/clang/test/Driver/unknown-std.cl
index 90ee97b77f9cd..285582ee0af6a 100644
--- a/clang/test/Driver/unknown-std.cl
+++ b/clang/test/Driver/unknown-std.cl
@@ -10,7 +10,7 @@
 // CHECK-NEXT: note: use 'cl1.1' for 'OpenCL 1.1' standard
 // CHECK-NEXT: note: use 'cl1.2' for 'OpenCL 1.2' standard
 // CHECK-NEXT: note: use 'cl2.0' for 'OpenCL 2.0' standard
-// CHECK-NEXT: note: use 'c++' for 'OpenCL C++ 1.0' standard
+// CHECK-NEXT: note: use 'c++' for 'C++ for OpenCL' standard
 
 // Make sure that no other output is present.
 // CHECK-NOT: {{^.+$}}
diff --git a/clang/test/Parser/opencl-cxx-keywords.cl b/clang/test/Parser/opencl-cxx-keywords.cl
index beae6f4b0895d..0cafcdf28f868 100644
--- a/clang/test/Parser/opencl-cxx-keywords.cl
+++ b/clang/test/Parser/opencl-cxx-keywords.cl
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -verify -fsyntax-only -fexceptions -fcxx-exceptions
 
 // This test checks that various C++ and OpenCL C keywords are not available
-// in OpenCL C++, according to OpenCL C++ 1.0 Specification Section 2.9.
+// in OpenCL.
 
 // Test that exceptions are disabled despite passing -fcxx-exceptions.
 kernel void test_exceptions() {
diff --git a/clang/test/Parser/opencl-cxx-virtual.cl b/clang/test/Parser/opencl-cxx-virtual.cl
index da08bfde957ba..53befbc32120b 100644
--- a/clang/test/Parser/opencl-cxx-virtual.cl
+++ b/clang/test/Parser/opencl-cxx-virtual.cl
@@ -3,17 +3,17 @@
 // Test that virtual functions and abstract classes are rejected.
 class virtual_functions {
   virtual void bad1() {}
-  //expected-error@-1 {{virtual functions are not supported in OpenCL C++}}
+  //expected-error@-1 {{virtual functions are not supported in C++ for OpenCL}}
 
   virtual void bad2() = 0;
-  //expected-error@-1 {{virtual functions are not supported in OpenCL C++}}
+  //expected-error@-1 {{virtual functions are not supported in C++ for OpenCL}}
   //expected-error@-2 {{'bad2' is not virtual and cannot be declared pure}}
 };
 
 template <typename T>
 class X {
   virtual T f();
-  //expected-error@-1 {{virtual functions are not supported in OpenCL C++}}
+  //expected-error@-1 {{virtual functions are not supported in C++ for OpenCL}}
 };
 
 // Test that virtual base classes are allowed.
diff --git a/clang/test/SemaOpenCLCXX/newdelete.cl b/clang/test/SemaOpenCLCXX/newdelete.cl
index 14be4550c0fc6..abc4c0fb6cbd6 100644
--- a/clang/test/SemaOpenCLCXX/newdelete.cl
+++ b/clang/test/SemaOpenCLCXX/newdelete.cl
@@ -19,8 +19,8 @@ class B {
 // There are no global user-defined new operators at this point. Test that clang
 // rejects these gracefully.
 void test_default_new_delete(void *buffer, A **pa) {
-  A *a = new A;         // expected-error {{'default new' is not supported in OpenCL C++}}
-  delete a;             // expected-error {{'default delete' is not supported in OpenCL C++}}
+  A *a = new A;         // expected-error {{'default new' is not supported in C++ for OpenCL}}
+  delete a;             // expected-error {{'default delete' is not supported in C++ for OpenCL}}
   *pa = new (buffer) A; // expected-error {{use of placement new requires explicit declaration}}
 }
 
@@ -36,10 +36,10 @@ void *operator new[](size_t _s, void *ptr) noexcept {
 
 void test_new_delete(void *buffer, A **a, B **b) {
   *a = new A; // expected-error {{no matching function for call to 'operator new'}}
-  delete a;   // expected-error {{'default delete' is not supported in OpenCL C++}}
+  delete a;   // expected-error {{'default delete' is not supported in C++ for OpenCL}}
 
   *a = new A[20]; // expected-error {{no matching function for call to 'operator new[]'}}
-  delete[] *a;    // expected-error {{'default delete' is not supported in OpenCL C++}}
+  delete[] *a;    // expected-error {{'default delete' is not supported in C++ for OpenCL}}
 
   // User-defined placement new is supported.
   *a = new (buffer) A;
diff --git a/clang/test/SemaOpenCLCXX/restricted.cl b/clang/test/SemaOpenCLCXX/restricted.cl
index f4ad27ad29d93..2af4ae137c41d 100644
--- a/clang/test/SemaOpenCLCXX/restricted.cl
+++ b/clang/test/SemaOpenCLCXX/restricted.cl
@@ -1,9 +1,9 @@
 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -pedantic -verify -fsyntax-only
 
 // This test checks that various C/C++/OpenCL C constructs are not available in
-// OpenCL C++, according to OpenCL C++ 1.0 Specification Section 2.9.
+// C++ for OpenCL.
 
-// Test that typeid is not available in OpenCL C++.
+// Test that typeid is not available.
 namespace std {
   // Provide a dummy std::type_info so that we can use typeid.
   class type_info {
@@ -11,9 +11,9 @@ namespace std {
   };
 }
 __constant std::type_info int_ti = typeid(int);
-// expected-error@-1 {{'typeid' is not supported in OpenCL C++}}
+// expected-error@-1 {{'typeid' is not supported in C++ for OpenCL}}
 
-// Test that dynamic_cast is not available in OpenCL C++.
+// Test that dynamic_cast is not available in C++ for OpenCL.
 class A {
 public:
   int a;
@@ -25,17 +25,17 @@ class B : public A {
 
 B *test_dynamic_cast(B *p) {
   return dynamic_cast<B *>(p);
-  // expected-error@-1 {{'dynamic_cast' is not supported in OpenCL C++}}
+  // expected-error@-1 {{'dynamic_cast' is not supported in C++ for OpenCL}}
 }
 
 // Test storage class qualifiers.
 __constant _Thread_local int a = 1;
-// expected-error@-1 {{OpenCL C++ version 1.0 does not support the '_Thread_local' storage class specifier}}
+// expected-error@-1 {{C++ for OpenCL version 1.0 does not support the '_Thread_local' storage class specifier}}
 __constant __thread int b = 2;
-// expected-error@-1 {{OpenCL C++ version 1.0 does not support the '__thread' storage class specifier}}
+// expected-error@-1 {{C++ for OpenCL version 1.0 does not support the '__thread' storage class specifier}}
 kernel void test_storage_classes() {
   register int x;
-  // expected-error@-1 {{OpenCL C++ version 1.0 does not support the 'register' storage class specifier}}
+  // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the 'register' storage class specifier}}
   thread_local int y;
-  // expected-error@-1 {{OpenCL C++ version 1.0 does not support the 'thread_local' storage class specifier}}
+  // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the 'thread_local' storage class specifier}}
 }

From 83748cc5abc199a5219b0e7d9ba308984a8df613 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Thu, 18 Jul 2019 10:04:18 +0000
Subject: [PATCH 438/451] [OpenCL] Improve destructor support in C++ for OpenCL

Summary:
This patch does mainly three things:
 1. It fixes a false positive error detection in Sema that is similar to
    D62156. The error happens when explicitly calling an overloaded
    destructor for different address spaces.
 2. It selects the correct destructor when multiple overloads for
    address spaces are available.
 3. It inserts the expected address space cast when invoking a
    destructor, if needed, and therefore fixes a crash due to the unmet
    assertion in llvm::CastInst::Create.

The following is a reproducer of the three issues:

    struct MyType {
      ~MyType() {}
      ~MyType() __constant {}
    };

    __constant MyType myGlobal{};

    kernel void foo() {
      myGlobal.~MyType(); // 1 and 2.
      // 1. error: cannot initialize object parameter of type
      //    '__generic MyType' with an expression of type '__constant MyType'
      // 2. error: no matching member function for call to '~MyType'
    }

    kernel void bar() {
      // 3. The implicit call to the destructor crashes due to:
      //    Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed.
      //    in llvm::CastInst::Create.
      MyType myLocal;
    }

The added test depends on D62413 and covers a few more things than the
above reproducer.

Subscribers: yaxunl, Anastasia, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D64569

llvm-svn: 366422
---
 clang/include/clang/AST/DeclCXX.h             | 14 ++++-
 clang/lib/AST/DeclCXX.cpp                     | 25 +++++++-
 clang/lib/CodeGen/CGCXXABI.h                  | 14 +++--
 clang/lib/CodeGen/CGCall.cpp                  |  2 +-
 clang/lib/CodeGen/CGClass.cpp                 | 40 ++++++++-----
 clang/lib/CodeGen/CGDecl.cpp                  | 21 ++++---
 clang/lib/CodeGen/CGExprCXX.cpp               | 31 +++++++---
 clang/lib/CodeGen/CodeGenFunction.h           | 13 ++--
 clang/lib/CodeGen/ItaniumCXXABI.cpp           | 31 ++++++----
 clang/lib/CodeGen/MicrosoftCXXABI.cpp         | 29 +++++----
 clang/lib/Sema/SemaDeclCXX.cpp                | 50 ++++++++--------
 clang/lib/Sema/SemaOverload.cpp               |  4 +-
 clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl | 14 -----
 .../CodeGenOpenCLCXX/addrspace-with-class.cl  | 59 +++++++++++++++++++
 14 files changed, 234 insertions(+), 113 deletions(-)
 delete mode 100644 clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl
 create mode 100644 clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index cbf4f1397eb1f..7add83f896244 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -2232,20 +2232,20 @@ class CXXMethodDecl : public FunctionDecl {
 
   overridden_method_range overridden_methods() const;
 
-  /// Returns the parent of this method declaration, which
+  /// Return the parent of this method declaration, which
   /// is the class in which this method is defined.
   const CXXRecordDecl *getParent() const {
     return cast<CXXRecordDecl>(FunctionDecl::getParent());
   }
 
-  /// Returns the parent of this method declaration, which
+  /// Return the parent of this method declaration, which
   /// is the class in which this method is defined.
   CXXRecordDecl *getParent() {
     return const_cast<CXXRecordDecl *>(
              cast<CXXRecordDecl>(FunctionDecl::getParent()));
   }
 
-  /// Returns the type of the \c this pointer.
+  /// Return the type of the \c this pointer.
   ///
   /// Should only be called for instance (i.e., non-static) methods. Note
   /// that for the call operator of a lambda closure type, this returns the
@@ -2253,9 +2253,17 @@ class CXXMethodDecl : public FunctionDecl {
   /// 'this' type.
   QualType getThisType() const;
 
+  /// Return the type of the object pointed by \c this.
+  ///
+  /// See getThisType() for usage restriction.
+  QualType getThisObjectType() const;
+
   static QualType getThisType(const FunctionProtoType *FPT,
                               const CXXRecordDecl *Decl);
 
+  static QualType getThisObjectType(const FunctionProtoType *FPT,
+                                    const CXXRecordDecl *Decl);
+
   Qualifiers getMethodQualifiers() const {
     return getType()->getAs<FunctionProtoType>()->getMethodQuals();
   }
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 857ac19e6b14c..59710a55498f2 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -2253,12 +2253,23 @@ CXXMethodDecl::overridden_methods() const {
   return getASTContext().overridden_methods(this);
 }
 
+static QualType getThisObjectType(ASTContext &C, const FunctionProtoType *FPT,
+                                  const CXXRecordDecl *Decl) {
+  QualType ClassTy = C.getTypeDeclType(Decl);
+  return C.getQualifiedType(ClassTy, FPT->getMethodQuals());
+}
+
 QualType CXXMethodDecl::getThisType(const FunctionProtoType *FPT,
                                     const CXXRecordDecl *Decl) {
   ASTContext &C = Decl->getASTContext();
-  QualType ClassTy = C.getTypeDeclType(Decl);
-  ClassTy = C.getQualifiedType(ClassTy, FPT->getMethodQuals());
-  return C.getPointerType(ClassTy);
+  QualType ObjectTy = ::getThisObjectType(C, FPT, Decl);
+  return C.getPointerType(ObjectTy);
+}
+
+QualType CXXMethodDecl::getThisObjectType(const FunctionProtoType *FPT,
+                                          const CXXRecordDecl *Decl) {
+  ASTContext &C = Decl->getASTContext();
+  return ::getThisObjectType(C, FPT, Decl);
 }
 
 QualType CXXMethodDecl::getThisType() const {
@@ -2273,6 +2284,14 @@ QualType CXXMethodDecl::getThisType() const {
                                     getParent());
 }
 
+QualType CXXMethodDecl::getThisObjectType() const {
+  // Ditto getThisType.
+  assert(isInstance() && "No 'this' for static methods!");
+
+  return CXXMethodDecl::getThisObjectType(getType()->getAs<FunctionProtoType>(),
+                                          getParent());
+}
+
 bool CXXMethodDecl::hasInlineBody() const {
   // If this function is a template instantiation, look at the template from
   // which it was instantiated.
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index 511bcd00d4277..3a9c3b3474394 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -378,7 +378,7 @@ class CGCXXABI {
   virtual void EmitDestructorCall(CodeGenFunction &CGF,
                                   const CXXDestructorDecl *DD, CXXDtorType Type,
                                   bool ForVirtualBase, bool Delegating,
-                                  Address This) = 0;
+                                  Address This, QualType ThisTy) = 0;
 
   /// Emits the VTable definitions required for the given record type.
   virtual void emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -421,11 +421,15 @@ class CGCXXABI {
                                              llvm::Type *Ty,
                                              SourceLocation Loc) = 0;
 
+  using DeleteOrMemberCallExpr =
+      llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>;
+
   /// Emit the ABI-specific virtual destructor call.
-  virtual llvm::Value *
-  EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
-                            CXXDtorType DtorType, Address This,
-                            const CXXMemberCallExpr *CE) = 0;
+  virtual llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
+                                                 const CXXDestructorDecl *Dtor,
+                                                 CXXDtorType DtorType,
+                                                 Address This,
+                                                 DeleteOrMemberCallExpr E) = 0;
 
   virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF,
                                                 GlobalDecl GD,
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 5f1fb10074829..cf8024550eeec 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3502,7 +3502,7 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup {
       const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
       assert(!Dtor->isTrivial());
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
-                                /*Delegating=*/false, Addr);
+                                /*Delegating=*/false, Addr, Ty);
     } else {
       CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty));
     }
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 9a9dd88810ed3..c8bb63c5c4b1f 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -491,12 +491,15 @@ namespace {
         cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
 
       const CXXDestructorDecl *D = BaseClass->getDestructor();
+      // We are already inside a destructor, so presumably the object being
+      // destroyed should have the expected type.
+      QualType ThisTy = D->getThisObjectType();
       Address Addr =
         CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(),
                                                   DerivedClass, BaseClass,
                                                   BaseIsVirtual);
       CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual,
-                                /*Delegating=*/false, Addr);
+                                /*Delegating=*/false, Addr, ThisTy);
     }
   };
 
@@ -1440,9 +1443,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   if (DtorType == Dtor_Deleting) {
     RunCleanupsScope DtorEpilogue(*this);
     EnterDtorCleanups(Dtor, Dtor_Deleting);
-    if (HaveInsertPoint())
+    if (HaveInsertPoint()) {
+      QualType ThisTy = Dtor->getThisObjectType();
       EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
-                            /*Delegating=*/false, LoadCXXThisAddress());
+                            /*Delegating=*/false, LoadCXXThisAddress(), ThisTy);
+    }
     return;
   }
 
@@ -1473,8 +1478,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     EnterDtorCleanups(Dtor, Dtor_Complete);
 
     if (!isTryBody) {
+      QualType ThisTy = Dtor->getThisObjectType();
       EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false,
-                            /*Delegating=*/false, LoadCXXThisAddress());
+                            /*Delegating=*/false, LoadCXXThisAddress(), ThisTy);
       break;
     }
 
@@ -2013,7 +2019,7 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF,
   const CXXDestructorDecl *dtor = record->getDestructor();
   assert(!dtor->isTrivial());
   CGF.EmitCXXDestructorCall(dtor, Dtor_Complete, /*for vbase*/ false,
-                            /*Delegating=*/false, addr);
+                            /*Delegating=*/false, addr, type);
 }
 
 void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
@@ -2363,8 +2369,11 @@ namespace {
       : Dtor(D), Addr(Addr), Type(Type) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
+      // We are calling the destructor from within the constructor.
+      // Therefore, "this" should have the expected type.
+      QualType ThisTy = Dtor->getThisObjectType();
       CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
-                                /*Delegating=*/true, Addr);
+                                /*Delegating=*/true, Addr, ThisTy);
     }
   };
 } // end anonymous namespace
@@ -2402,31 +2411,32 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor
 void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD,
                                             CXXDtorType Type,
                                             bool ForVirtualBase,
-                                            bool Delegating,
-                                            Address This) {
+                                            bool Delegating, Address This,
+                                            QualType ThisTy) {
   CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase,
-                                     Delegating, This);
+                                     Delegating, This, ThisTy);
 }
 
 namespace {
   struct CallLocalDtor final : EHScopeStack::Cleanup {
     const CXXDestructorDecl *Dtor;
     Address Addr;
+    QualType Ty;
 
-    CallLocalDtor(const CXXDestructorDecl *D, Address Addr)
-      : Dtor(D), Addr(Addr) {}
+    CallLocalDtor(const CXXDestructorDecl *D, Address Addr, QualType Ty)
+        : Dtor(D), Addr(Addr), Ty(Ty) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                                 /*ForVirtualBase=*/false,
-                                /*Delegating=*/false, Addr);
+                                /*Delegating=*/false, Addr, Ty);
     }
   };
 } // end anonymous namespace
 
 void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D,
-                                            Address Addr) {
-  EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr);
+                                            QualType T, Address Addr) {
+  EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr, T);
 }
 
 void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) {
@@ -2436,7 +2446,7 @@ void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) {
 
   const CXXDestructorDecl *D = ClassDecl->getDestructor();
   assert(D && D->isUsed() && "destructor not marked as used!");
-  PushDestructorCleanup(D, Addr);
+  PushDestructorCleanup(D, T, Addr);
 }
 
 void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 19a9e75cc5ac9..6ad43cefc4d29 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -480,11 +480,12 @@ namespace {
 
   template <class Derived>
   struct DestroyNRVOVariable : EHScopeStack::Cleanup {
-    DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag)
-        : NRVOFlag(NRVOFlag), Loc(addr) {}
+    DestroyNRVOVariable(Address addr, QualType type, llvm::Value *NRVOFlag)
+        : NRVOFlag(NRVOFlag), Loc(addr), Ty(type) {}
 
     llvm::Value *NRVOFlag;
     Address Loc;
+    QualType Ty;
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Along the exceptions path we always execute the dtor.
@@ -511,26 +512,24 @@ namespace {
 
   struct DestroyNRVOVariableCXX final
       : DestroyNRVOVariable<DestroyNRVOVariableCXX> {
-    DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor,
-                           llvm::Value *NRVOFlag)
-      : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag),
-        Dtor(Dtor) {}
+    DestroyNRVOVariableCXX(Address addr, QualType type,
+                           const CXXDestructorDecl *Dtor, llvm::Value *NRVOFlag)
+        : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, type, NRVOFlag),
+          Dtor(Dtor) {}
 
     const CXXDestructorDecl *Dtor;
 
     void emitDestructorCall(CodeGenFunction &CGF) {
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                                 /*ForVirtualBase=*/false,
-                                /*Delegating=*/false, Loc);
+                                /*Delegating=*/false, Loc, Ty);
     }
   };
 
   struct DestroyNRVOVariableC final
       : DestroyNRVOVariable<DestroyNRVOVariableC> {
     DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty)
-        : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {}
-
-    QualType Ty;
+        : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, Ty, NRVOFlag) {}
 
     void emitDestructorCall(CodeGenFunction &CGF) {
       CGF.destroyNonTrivialCStruct(CGF, Loc, Ty);
@@ -1940,7 +1939,7 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
     if (emission.NRVOFlag) {
       assert(!type->isArrayType());
       CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor();
-      EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor,
+      EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, type, dtor,
                                                   emission.NRVOFlag);
       return;
     }
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 8ad229fc0c362..5476d13b7c461 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -10,12 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenFunction.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
+#include "CodeGenFunction.h"
 #include "ConstantEmitter.h"
+#include "TargetInfo.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/IR/Intrinsics.h"
@@ -90,12 +91,26 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
 }
 
 RValue CodeGenFunction::EmitCXXDestructorCall(
-    GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This,
+    GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, QualType ThisTy,
     llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) {
+  const CXXMethodDecl *DtorDecl = cast<CXXMethodDecl>(Dtor.getDecl());
+
+  assert(!ThisTy.isNull());
+  assert(ThisTy->getAsCXXRecordDecl() == DtorDecl->getParent() &&
+         "Pointer/Object mixup");
+
+  LangAS SrcAS = ThisTy.getAddressSpace();
+  LangAS DstAS = DtorDecl->getMethodQualifiers().getAddressSpace();
+  if (SrcAS != DstAS) {
+    QualType DstTy = DtorDecl->getThisType();
+    llvm::Type *NewType = CGM.getTypes().ConvertType(DstTy);
+    This = getTargetHooks().performAddrSpaceCast(*this, This, SrcAS, DstAS,
+                                                 NewType);
+  }
+
   CallArgList Args;
-  commonEmitCXXMemberOrOperatorCall(*this, cast<CXXMethodDecl>(Dtor.getDecl()),
-                                    This, ImplicitParam, ImplicitParamTy, CE,
-                                    Args, nullptr);
+  commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam,
+                                    ImplicitParamTy, CE, Args, nullptr);
   return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee,
                   ReturnValueSlot(), Args);
 }
@@ -345,7 +360,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
         Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(GD, Ty), GD);
       }
 
-      EmitCXXDestructorCall(GD, Callee, This.getPointer(),
+      QualType ThisTy =
+          IsArrow ? Base->getType()->getPointeeType() : Base->getType();
+      EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
                             /*ImplicitParam=*/nullptr,
                             /*ImplicitParamTy=*/QualType(), nullptr);
     }
@@ -1883,7 +1900,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
     CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                               /*ForVirtualBase=*/false,
                               /*Delegating=*/false,
-                              Ptr);
+                              Ptr, ElementType);
   else if (auto Lifetime = ElementType.getObjCLifetime()) {
     switch (Lifetime) {
     case Qualifiers::OCL_None:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 06ef2dff7e9f5..c3060d1fb3514 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -675,7 +675,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// PushDestructorCleanup - Push a cleanup to call the
   /// complete-object variant of the given destructor on the object at
   /// the given address.
-  void PushDestructorCleanup(const CXXDestructorDecl *Dtor, Address Addr);
+  void PushDestructorCleanup(const CXXDestructorDecl *Dtor, QualType T,
+                             Address Addr);
 
   /// PopCleanupBlock - Will pop the cleanup entry on the stack and
   /// process all branch fixups.
@@ -2554,8 +2555,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   static Destroyer destroyCXXObject;
 
   void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type,
-                             bool ForVirtualBase, bool Delegating,
-                             Address This);
+                             bool ForVirtualBase, bool Delegating, Address This,
+                             QualType ThisTy);
 
   void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
                                llvm::Type *ElementTy, Address NewPtr,
@@ -3677,9 +3678,9 @@ class CodeGenFunction : public CodeGenTypeCache {
                               llvm::Value *ImplicitParam,
                               QualType ImplicitParamTy, const CallExpr *E,
                               CallArgList *RtlArgs);
-  RValue EmitCXXDestructorCall(GlobalDecl Dtor,
-                               const CGCallee &Callee,
-                               llvm::Value *This, llvm::Value *ImplicitParam,
+  RValue EmitCXXDestructorCall(GlobalDecl Dtor, const CGCallee &Callee,
+                               llvm::Value *This, QualType ThisTy,
+                               llvm::Value *ImplicitParam,
                                QualType ImplicitParamTy, const CallExpr *E);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
                                ReturnValueSlot ReturnValue);
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 7367ff37cf45c..3b2413d960d63 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -224,7 +224,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
 
   void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
                           CXXDtorType Type, bool ForVirtualBase,
-                          bool Delegating, Address This) override;
+                          bool Delegating, Address This,
+                          QualType ThisTy) override;
 
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
@@ -261,9 +262,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
-                                         CXXDtorType DtorType,
-                                         Address This,
-                                         const CXXMemberCallExpr *CE) override;
+                                         CXXDtorType DtorType, Address This,
+                                         DeleteOrMemberCallExpr E) override;
 
   void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
 
@@ -1128,7 +1128,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
   // FIXME: Provide a source location here even though there's no
   // CXXMemberCallExpr for dtor call.
   CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
-  EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr);
+  EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE);
 
   if (UseGlobalDelete)
     CGF.PopCleanupBlock();
@@ -1539,7 +1539,8 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
 void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                        const CXXDestructorDecl *DD,
                                        CXXDtorType Type, bool ForVirtualBase,
-                                       bool Delegating, Address This) {
+                                       bool Delegating, Address This,
+                                       QualType ThisTy) {
   GlobalDecl GD(DD, Type);
   llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
   QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
@@ -1551,7 +1552,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
   else
     Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD);
 
-  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), VTT, VTTTy, nullptr);
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, VTT, VTTTy,
+                            nullptr);
 }
 
 void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1739,7 +1741,10 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
 llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    Address This, const CXXMemberCallExpr *CE) {
+    Address This, DeleteOrMemberCallExpr E) {
+  auto *CE = E.dyn_cast<const CXXMemberCallExpr *>();
+  auto *D = E.dyn_cast<const CXXDeleteExpr *>();
+  assert((CE != nullptr) ^ (D != nullptr));
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
@@ -1749,8 +1754,14 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
   llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
   CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
 
-  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), nullptr, QualType(),
-                            nullptr);
+  QualType ThisTy;
+  if (CE)
+    ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
+  else
+    ThisTy = D->getDestroyedType();
+
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr,
+                            QualType(), nullptr);
   return nullptr;
 }
 
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index a91a949d024f8..fa34414de5da1 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -258,7 +258,8 @@ class MicrosoftCXXABI : public CGCXXABI {
 
   void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
                           CXXDtorType Type, bool ForVirtualBase,
-                          bool Delegating, Address This) override;
+                          bool Delegating, Address This,
+                          QualType ThisTy) override;
 
   void emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD,
                               llvm::GlobalVariable *VTable);
@@ -296,9 +297,8 @@ class MicrosoftCXXABI : public CGCXXABI {
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
-                                         CXXDtorType DtorType,
-                                         Address This,
-                                         const CXXMemberCallExpr *CE) override;
+                                         CXXDtorType DtorType, Address This,
+                                         DeleteOrMemberCallExpr E) override;
 
   void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
                                         CallArgList &CallArgs) override {
@@ -844,8 +844,7 @@ void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
   // CXXMemberCallExpr for dtor call.
   bool UseGlobalDelete = DE->isGlobalDelete();
   CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
-  llvm::Value *MDThis =
-      EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr);
+  llvm::Value *MDThis = EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE);
   if (UseGlobalDelete)
     CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
 }
@@ -1569,7 +1568,8 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
 void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *DD,
                                          CXXDtorType Type, bool ForVirtualBase,
-                                         bool Delegating, Address This) {
+                                         bool Delegating, Address This,
+                                         QualType ThisTy) {
   // Use the base destructor variant in place of the complete destructor variant
   // if the class has no virtual bases. This effectively implements some of the
   // -mconstructor-aliases optimization, but as part of the MS C++ ABI.
@@ -1591,7 +1591,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
     BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF);
   }
 
-  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(),
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
                             /*ImplicitParam=*/nullptr,
                             /*ImplicitParamTy=*/QualType(), nullptr);
   if (BaseDtorEndBB) {
@@ -1900,7 +1900,10 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
 llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    Address This, const CXXMemberCallExpr *CE) {
+    Address This, DeleteOrMemberCallExpr E) {
+  auto *CE = E.dyn_cast<const CXXMemberCallExpr *>();
+  auto *D = E.dyn_cast<const CXXDeleteExpr *>();
+  assert((CE != nullptr) ^ (D != nullptr));
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
@@ -1917,8 +1920,14 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
       llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
       DtorType == Dtor_Deleting);
 
+  QualType ThisTy;
+  if (CE)
+    ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
+  else
+    ThisTy = D->getDestroyedType();
+
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(),
+  RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
                                         ImplicitParam, Context.IntTy, CE);
   return RV.getScalarVal();
 }
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index dd77fc55721fb..9a6385f283196 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -8190,6 +8190,27 @@ void Sema::ActOnFinishDelayedCXXMethodDeclaration(Scope *S, Decl *MethodD) {
     CheckCXXDefaultArguments(Method);
 }
 
+// Emit the given diagnostic for each non-address-space qualifier.
+// Common part of CheckConstructorDeclarator and CheckDestructorDeclarator.
+static void checkMethodTypeQualifiers(Sema &S, Declarator &D, unsigned DiagID) {
+  const DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
+  if (FTI.hasMethodTypeQualifiers() && !D.isInvalidType()) {
+    bool DiagOccured = false;
+    FTI.MethodQualifiers->forEachQualifier(
+        [DiagID, &S, &DiagOccured](DeclSpec::TQ, StringRef QualName,
+                                   SourceLocation SL) {
+          // This diagnostic should be emitted on any qualifier except an addr
+          // space qualifier. However, forEachQualifier currently doesn't visit
+          // addr space qualifiers, so there's no way to write this condition
+          // right now; we just diagnose on everything.
+          S.Diag(SL, DiagID) << QualName << SourceRange(SL);
+          DiagOccured = true;
+        });
+    if (DiagOccured)
+      D.setInvalidType();
+  }
+}
+
 /// CheckConstructorDeclarator - Called by ActOnDeclarator to check
 /// the well-formedness of the constructor declarator @p D with type @p
 /// R. If there are any errors in the declarator, this routine will
@@ -8230,25 +8251,11 @@ QualType Sema::CheckConstructorDeclarator(Declarator &D, QualType R,
     D.setInvalidType();
   }
 
-  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
-  if (FTI.hasMethodTypeQualifiers()) {
-    bool DiagOccured = false;
-    FTI.MethodQualifiers->forEachQualifier(
-        [&](DeclSpec::TQ TypeQual, StringRef QualName, SourceLocation SL) {
-          // This diagnostic should be emitted on any qualifier except an addr
-          // space qualifier. However, forEachQualifier currently doesn't visit
-          // addr space qualifiers, so there's no way to write this condition
-          // right now; we just diagnose on everything.
-          Diag(SL, diag::err_invalid_qualified_constructor)
-              << QualName << SourceRange(SL);
-          DiagOccured = true;
-        });
-    if (DiagOccured)
-      D.setInvalidType();
-  }
+  checkMethodTypeQualifiers(*this, D, diag::err_invalid_qualified_constructor);
 
   // C++0x [class.ctor]p4:
   //   A constructor shall not be declared with a ref-qualifier.
+  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
   if (FTI.hasRefQualifier()) {
     Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_constructor)
       << FTI.RefQualifierIsLValueRef
@@ -8423,18 +8430,11 @@ QualType Sema::CheckDestructorDeclarator(Declarator &D, QualType R,
     }
   }
 
-  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
-  if (FTI.hasMethodTypeQualifiers() && !D.isInvalidType()) {
-    FTI.MethodQualifiers->forEachQualifier(
-        [&](DeclSpec::TQ TypeQual, StringRef QualName, SourceLocation SL) {
-          Diag(SL, diag::err_invalid_qualified_destructor)
-              << QualName << SourceRange(SL);
-        });
-    D.setInvalidType();
-  }
+  checkMethodTypeQualifiers(*this, D, diag::err_invalid_qualified_destructor);
 
   // C++0x [class.dtor]p2:
   //   A destructor shall not be declared with a ref-qualifier.
+  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
   if (FTI.hasRefQualifier()) {
     Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_destructor)
       << FTI.RefQualifierIsLValueRef
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index d8c4ea48ebce7..f632a4d3bd1a7 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -5093,12 +5093,10 @@ TryObjectArgumentInitialization(Sema &S, SourceLocation Loc, QualType FromType,
   QualType ClassType = S.Context.getTypeDeclType(ActingContext);
   // [class.dtor]p2: A destructor can be invoked for a const, volatile or
   //                 const volatile object.
-  Qualifiers Quals;
+  Qualifiers Quals = Method->getMethodQualifiers();
   if (isa<CXXDestructorDecl>(Method)) {
     Quals.addConst();
     Quals.addVolatile();
-  } else {
-    Quals = Method->getMethodQualifiers();
   }
 
   QualType ImplicitParamType = S.Context.getQualifiedType(ClassType, Quals);
diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl b/clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl
deleted file mode 100644
index 42c2e6e9077a8..0000000000000
--- a/clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s
-
-struct MyType {
-  MyType(int i) : i(i) {}
-  MyType(int i) __constant : i(i) {}
-  int i;
-};
-
-//CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const1, i32 1)
-__constant MyType const1 = 1;
-//CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const2, i32 2)
-__constant MyType const2(2);
-//CHECK: call void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* addrspacecast (%struct.MyType addrspace(1)* @glob to %struct.MyType addrspace(4)*), i32 1)
-MyType glob(1);
diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl b/clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl
new file mode 100644
index 0000000000000..21ba1ca251d86
--- /dev/null
+++ b/clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s --check-prefix=CHECK-DEFINITIONS
+
+// This test ensures the proper address spaces and address space cast are used
+// for constructors, member functions and destructors.
+// See also atexit.cl and global_init.cl for other specific tests.
+
+// CHECK: %struct.MyType = type { i32 }
+struct MyType {
+  MyType(int i) : i(i) {}
+  MyType(int i) __constant : i(i) {}
+  ~MyType() {}
+  ~MyType() __constant {}
+  int bar() { return i + 2; }
+  int bar() __constant { return i + 1; }
+  int i;
+};
+
+// CHECK: @const1 = addrspace(2) global %struct.MyType zeroinitializer
+__constant MyType const1 = 1;
+// CHECK: @const2 = addrspace(2) global %struct.MyType zeroinitializer
+__constant MyType const2(2);
+// CHECK: @glob = addrspace(1) global %struct.MyType zeroinitializer
+MyType glob(1);
+
+// CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const1, i32 1)
+// CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const2, i32 2)
+// CHECK: call void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* addrspacecast (%struct.MyType addrspace(1)* @glob to %struct.MyType addrspace(4)*), i32 1)
+
+// CHECK-LABEL: define spir_kernel void @fooGlobal()
+kernel void fooGlobal() {
+  // CHECK: call i32 @_ZNU3AS46MyType3barEv(%struct.MyType addrspace(4)* addrspacecast (%struct.MyType addrspace(1)* @glob to %struct.MyType addrspace(4)*))
+  glob.bar();
+  // CHECK: call i32 @_ZNU3AS26MyType3barEv(%struct.MyType addrspace(2)* @const1)
+  const1.bar();
+  // CHECK: call void @_ZNU3AS26MyTypeD1Ev(%struct.MyType addrspace(2)* @const1)
+  const1.~MyType();
+}
+
+// CHECK-LABEL: define spir_kernel void @fooLocal()
+kernel void fooLocal() {
+  // CHECK: [[VAR:%.*]] = alloca %struct.MyType
+  // CHECK: [[REG:%.*]] = addrspacecast %struct.MyType* [[VAR]] to %struct.MyType addrspace(4)*
+  // CHECK: call void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* [[REG]], i32 3)
+  MyType myLocal(3);
+  // CHECK: [[REG:%.*]] = addrspacecast %struct.MyType* [[VAR]] to %struct.MyType addrspace(4)*
+  // CHECK: call i32 @_ZNU3AS46MyType3barEv(%struct.MyType addrspace(4)* [[REG]])
+  myLocal.bar();
+  // CHECK: [[REG:%.*]] = addrspacecast %struct.MyType* [[VAR]] to %struct.MyType addrspace(4)*
+  // CHECK: call void @_ZNU3AS46MyTypeD1Ev(%struct.MyType addrspace(4)* [[REG]])
+}
+
+// Ensure all members are defined for all the required address spaces.
+// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* %this, i32 %i)
+// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* %this, i32 %i)
+// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS26MyTypeD1Ev(%struct.MyType addrspace(2)* %this)
+// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS46MyTypeD1Ev(%struct.MyType addrspace(4)* %this)
+// CHECK-DEFINITIONS-DAG: define linkonce_odr i32 @_ZNU3AS26MyType3barEv(%struct.MyType addrspace(2)* %this)
+// CHECK-DEFINITIONS-DAG: define linkonce_odr i32 @_ZNU3AS46MyType3barEv(%struct.MyType addrspace(4)* %this)

From 11512e742b283a2845f1afa6242c63efcd2ac102 Mon Sep 17 00:00:00 2001
From: "Diogo N. Sampaio" <diogo.sampaio@arm.com>
Date: Thu, 18 Jul 2019 10:05:56 +0000
Subject: [PATCH 439/451] [ARM][DAGCOMBINE][FIX] PerformVMOVRRDCombine

Summary:
PerformVMOVRRDCombine ommits adding a offset
of 4 to the PointerInfo, when converting a
f64 = load[M]
to
{i32, i32} = {load[M], load[M + 4]}

Which would allow the machine scheduller
to break dependencies with the second load.

 - pr42638

Reviewers: eli.friedman, dmgreen, ostannard

Reviewed By: ostannard

Subscribers: ostannard, javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64870

llvm-svn: 366423
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  8 +++--
 .../CodeGen/ARM/pr42638-VMOVRRDCombine.ll     | 33 +++++++++++++++++++
 2 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 09b78115f2e3c..18bb9bf3eccc6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -11748,9 +11748,11 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                     DAG.getConstant(4, DL, MVT::i32));
-    SDValue NewLD2 = DAG.getLoad(
-        MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(),
-        std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags());
+
+    SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
+                                 LD->getPointerInfo().getWithOffset(4),
+                                 std::min(4U, LD->getAlignment()),
+                                 LD->getMemOperand()->getFlags());
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
     if (DCI.DAG.getDataLayout().isBigEndian())
diff --git a/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll b/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll
new file mode 100644
index 0000000000000..aac5de4ce5e3c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/pr42638-VMOVRRDCombine.ll
@@ -0,0 +1,33 @@
+; RUN: llc -stop-after=machine-scheduler -debug-only dagcombine,selectiondag -o - %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; pr42638
+target triple = "armv8r-arm-none-eabi"
+%struct.__va_list = type { i8* }
+define double @foo(i32 %P0, ...) #0 {
+entry:
+  %V1 = alloca [8 x i8], align 8
+  %vl = alloca %struct.__va_list, align 4
+  %0 = getelementptr inbounds [8 x i8], [8 x i8]* %V1, i32 0, i32 0
+  call void asm sideeffect "", "r"(i8* nonnull %0)
+  %1 = bitcast %struct.__va_list* %vl to i8*
+  call void @llvm.va_start(i8* nonnull %1)
+  %2 = bitcast %struct.__va_list* %vl to double**
+  %argp.cur3 = load double*, double** %2, align 4
+  %v.sroa.0.0.copyload = load double, double* %argp.cur3, align 4
+  ret double %v.sroa.0.0.copyload
+}
+
+declare void @llvm.va_start(i8*)
+
+attributes #0 = { "target-cpu"="cortex-r52" "target-features"="-fp64"  }
+
+; Ensures that the machine scheduler does not move accessing the upper
+; 32 bits of the double to before actually storing it to memory
+
+; CHECK: Creating new node: {{.*}} = add FrameIndex:i32<2>, Constant:i32<4>
+; CHECK-NEXT: Creating new node: {{.*}} i32,ch = load<(load 4 from [[MEM:%.*]] + 4)>
+; CHECK: INLINEASM
+; CHECK: (load 4 from [[MEM]] + 4)
+; CHECK-NOT: (store 4 into [[MEM]] + 4)
+
+

From 782390258b5cfcde078b71513f21fd05fc59cb34 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 18 Jul 2019 10:43:07 +0000
Subject: [PATCH 440/451] [ELF][PPC] Refactor some ppc64 tests

Merge ppc64-dynamic-relocations.s into ppc64-plt-stub.s
Add ppc64-tls-ie.s: covers ppc64-initial-exec-tls.s and ppc64-tls-ie-le.s
Add ppc64-tls-gd.s: covers ppc64-general-dynamic-tls.s, ppc64-gd-to-ie.s, ppc64-tls-gd-le.s, and ppc64-tls-gd-le-small.s

llvm-svn: 366424
---
 lld/ELF/Arch/PPC64.cpp                   |   1 +
 lld/test/ELF/ppc64-gd-to-ie.s            | 100 -------------
 lld/test/ELF/ppc64-general-dynamic-tls.s | 112 ---------------
 lld/test/ELF/ppc64-initial-exec-tls.s    | 102 --------------
 lld/test/ELF/ppc64-plt-stub.s            |  59 ++++----
 lld/test/ELF/ppc64-tls-gd-le-small.s     |  61 --------
 lld/test/ELF/ppc64-tls-gd-le.s           |  79 -----------
 lld/test/ELF/ppc64-tls-gd.s              | 102 ++++++++++++++
 lld/test/ELF/ppc64-tls-ie-le.s           | 140 -------------------
 lld/test/ELF/ppc64-tls-ie.s              | 171 +++++++++++++++++++++++
 10 files changed, 308 insertions(+), 619 deletions(-)
 delete mode 100644 lld/test/ELF/ppc64-gd-to-ie.s
 delete mode 100644 lld/test/ELF/ppc64-general-dynamic-tls.s
 delete mode 100644 lld/test/ELF/ppc64-initial-exec-tls.s
 delete mode 100644 lld/test/ELF/ppc64-tls-gd-le-small.s
 delete mode 100644 lld/test/ELF/ppc64-tls-gd-le.s
 create mode 100644 lld/test/ELF/ppc64-tls-gd.s
 delete mode 100644 lld/test/ELF/ppc64-tls-ie-le.s
 create mode 100644 lld/test/ELF/ppc64-tls-ie.s

diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 96b829101f8d3..70d284cfad713 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -946,6 +946,7 @@ void PPC64::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
     //                      addis rT, r2, sym@got@tprel@ha.
     relocateOne(loc, R_PPC64_GOT_TPREL16_HA, val);
     return;
+  case R_PPC64_GOT_TLSGD16:
   case R_PPC64_GOT_TLSGD16_LO: {
     // Relax from addi  r3, rA, sym@got@tlsgd@l to
     //            ld r3, sym@got@tprel@l(rA)
diff --git a/lld/test/ELF/ppc64-gd-to-ie.s b/lld/test/ELF/ppc64-gd-to-ie.s
deleted file mode 100644
index a11024a58d4cd..0000000000000
--- a/lld/test/ELF/ppc64-gd-to-ie.s
+++ /dev/null
@@ -1,100 +0,0 @@
-# REQUIRES: ppc
-
-# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o
-# RUN: ld.lld -shared %t2.o -o %t3.so
-# RUN: ld.lld  %t.o %t3.so -o %t
-# RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s
-# RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s
-# RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-
-# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o
-# RUN: ld.lld -shared %t2.o -o %t3.so
-# RUN: ld.lld  %t.o %t3.so -o %t
-# RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s
-# RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s
-# RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-
-        .text
-        .abiversion 2
-        .globl _start
-        .p2align        4
-        .type   _start,@function
-_start:
-.Lfunc_gep0:
-        addis 2, 12, .TOC.-.Lfunc_gep0@ha
-        addi 2, 2, .TOC.-.Lfunc_gep0@l
-.Lfunc_lep0:
-        .localentry     _start, .Lfunc_lep0-.Lfunc_gep0
-        mflr 0
-        std 0, 16(1)
-        stdu 1, -32(1)
-        addis 3, 2, a@got@tlsgd@ha
-        addi 3, 3, a@got@tlsgd@l
-        bl __tls_get_addr(a@tlsgd)
-        nop
-        lwa 3, 0(3)
-        addi 1, 1, 32
-        ld 0, 16(1)
-        mtlr 0
-        blr
-
-
-        .globl other_reg
-        .p2align        4
-        .type   other_reg,@function
-other_reg:
-.Lfunc_gep1:
-        addis 2, 12, .TOC.-.Lfunc_gep1@ha
-        addi 2, 2, .TOC.-.Lfunc_gep1@l
-.Lfunc_lep1:
-        .localentry     other_reg, .Lfunc_lep1-.Lfunc_gep1
-        mflr 0
-        std 0, 16(1)
-        stdu 1, -32(1)
-        addis 5, 2, a@got@tlsgd@ha
-        addi 3, 5, a@got@tlsgd@l
-        bl __tls_get_addr(a@tlsgd)
-        nop
-        lwa 4, 0(3)
-        addis 30, 2, b@got@tlsgd@ha
-        addi 3, 30, b@got@tlsgd@l
-        bl __tls_get_addr(b@tlsgd)
-        nop
-        lwa 3, 0(3)
-        add 3, 4, 3
-        addi 1, 1, 32
-        ld 0, 16(1)
-        mtlr 0
-        blr
-
-
-# CheckGot: .got          00000018 00000000100200c0 DATA
-# .got is at 0x100200c0 so the toc-base is 100280c0.
-# `a` is at .got[1], we expect the offsets to be:
-# Ha(a) = ((0x100200c8  - 0x100280c0) + 0x8000) >> 16 = 0
-# Lo(a) = (0x100200c8  - 0x100280c0) = -32760
-
-# Dis-LABEL: _start
-# Dis:         addis 3, 2, 0
-# Dis-NEXT:    ld 3, -32760(3)
-# Dis-NEXT:    nop
-# Dis-NEXT:    add 3, 3, 13
-
-# Dis-LABEL: other_reg
-# Dis:         addis 5, 2, 0
-# Dis-NEXT:    ld 3, -32760(5)
-# Dis-NEXT:    nop
-# Dis-NEXT:    add 3, 3, 13
-# Dis:         addis 30, 2, 0
-# Dis:         ld 3, -32752(30)
-# Dis-NEXT:    nop
-# Dis-NEXT:    add 3, 3, 13
-
-# Verify that the only dynamic relocations we emit are TPREL ones rather then
-# the DTPMOD64/DTPREL64 pair for general-dynamic.
-# OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 2 entries:
-# OutputRelocs-NEXT:    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
-# OutputRelocs-NEXT:  {{[0-9a-f]+}}    {{[0-9a-f]+}}   R_PPC64_TPREL64        {{[0-9a-f]+}} a + 0
-# OutputRelocs-NEXT:  {{[0-9a-f]+}}    {{[0-9a-f]+}}   R_PPC64_TPREL64        {{[0-9a-f]+}} b + 0
diff --git a/lld/test/ELF/ppc64-general-dynamic-tls.s b/lld/test/ELF/ppc64-general-dynamic-tls.s
deleted file mode 100644
index 8ae25a91a0876..0000000000000
--- a/lld/test/ELF/ppc64-general-dynamic-tls.s
+++ /dev/null
@@ -1,112 +0,0 @@
-// REQUIRES: ppc
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-// RUN: ld.lld -shared %t.o -o %t.so
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=OutputRelocs %s
-// RUN: llvm-objdump --section-headers %t.so | FileCheck --check-prefix=CheckGot %s
-// RUN: llvm-objdump -d %t.so | FileCheck --check-prefix=Dis %s
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-// RUN: ld.lld -shared %t.o -o %t.so
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: llvm-readelf -r %t.so | FileCheck --check-prefix=OutputRelocs %s
-// RUN: llvm-objdump --section-headers %t.so | FileCheck --check-prefix=CheckGot %s
-// RUN: llvm-objdump -d %t.so | FileCheck --check-prefix=Dis %s
-
-	.text
-	.abiversion 2
-	.globl	test
-	.p2align	4
-	.type	test,@function
-test:
-.Lfunc_gep0:
-	addis 2, 12, .TOC.-.Lfunc_gep0@ha
-	addi 2, 2, .TOC.-.Lfunc_gep0@l
-.Lfunc_lep0:
-	.localentry	test, .Lfunc_lep0-.Lfunc_gep0
-	mflr 0
-	std 31, -8(1)
-	std 0, 16(1)
-	stdu 1, -48(1)
-	mr 31, 1
-	std 30, 32(31)
-	addis 3, 2, i@got@tlsgd@ha
-	addi 3, 3, i@got@tlsgd@l
-	bl __tls_get_addr(i@tlsgd)
-	nop
-	lwz 30, 0(3)
-	extsw 3, 30
-	ld 30, 32(31)
-	addi 1, 1, 48
-	ld 0, 16(1)
-	ld 31, -8(1)
-	mtlr 0
-	blr
-
-
-test_hi:
-.Lfunc_gep1:
-  addis 2, 12, .TOC.-.Lfunc_gep1@ha
-  addi  2, 2,  .TOC.-.Lfunc_gep1@l
-.Lfunc_lep1:
-  .localentry test2, .Lfunc_lep1-.Lfunc_gep1
-  addis 3, 0, j@got@tlsgd@h
-  blr
-
-test_16:
-.Lfunc_gep2:
-  addis 2, 12, .TOC.-.Lfunc_gep2@ha
-  addi 2, 2, .TOC.-.Lfunc_gep2@l
-.Lfunc_lep2:
-  .localentry test16, .Lfunc_lep2-.Lfunc_gep2
-  addi 3, 0, k@got@tlsgd
-  blr
-
-// Verify that the input has every general-dynamic tls relocation type.
-// InputRelocs:  Relocation section '.rela.text'
-// InputRelocs: R_PPC64_GOT_TLSGD16_HA  {{0+}}  i + 0
-// InputRelocs: R_PPC64_GOT_TLSGD16_LO  {{0+}}  i + 0
-// InputRelocs: R_PPC64_TLSGD           {{0+}}  i + 0
-// InputRelocs: R_PPC64_GOT_TLSGD16_HI  {{0+}}  j + 0
-// InputRelocs: R_PPC64_GOT_TLSGD16     {{0+}}  k + 0
-
-// There is 2 got entries for each tls variable that is accessed with the
-// general-dynamic model.  The entries can be though of as a structure to be
-// filled in by the dynamic linker:
-// typedef struct {
-//  unsigned long int ti_module; --> R_PPC64_DTPMOD64
-//  unsigned long int ti_offset; --> R_PPC64_DTPREL64
-//} tls_index;
-// OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 6 entries:
-// OutputRelocs: R_PPC64_DTPMOD64  {{0+}}  i + 0
-// OutputRelocs: R_PPC64_DTPREL64  {{0+}}  i + 0
-// OutputRelocs: R_PPC64_DTPMOD64  {{0+}}  j + 0
-// OutputRelocs: R_PPC64_DTPREL64  {{0+}}  j + 0
-// OutputRelocs: R_PPC64_DTPMOD64  {{0+}}  k + 0
-// OutputRelocs: R_PPC64_DTPREL64  {{0+}}  k + 0
-
-// Check that the got has 7 entires. (1 for the TOC and 3 structures of
-// 2 entries for the tls variables). Also verify the address so we can check
-// the offsets we calculated for each relocation type.
-// CheckGot: got          00000038 00000000000200f0
-
-// got starts at 0x200f0, so .TOC. will be 0x280f0.
-
-// We are building the address of the first tls_index in the got which starts at
-// 0x200f8 (got[1]).
-// #ha(i@got@tlsgd) --> (0x200f8 - 0x280f0 + 0x8000) >> 16 = 0
-// #lo(i@got@tlsgd) --> (0x200f8 - 0x280f0) & 0xFFFF =  -7ff8 = -32760
-// Dis:  test:
-// Dis:    addis 3, 2, 0
-// Dis:    addi 3, 3, -32760
-
-// Second tls_index starts at got[3].
-// #hi(j@got@tlsgd) --> (0x20108 - 0x280f0) >> 16 = -1
-// Dis: test_hi:
-// Dis:   lis 3, -1
-
-// Third tls index is at got[5].
-// k@got@tlsgd --> (0x20118 -  0x280f0) = -0x7fd8 = -32728
-// Dis: test_16:
-// Dis:   li 3, -32728
diff --git a/lld/test/ELF/ppc64-initial-exec-tls.s b/lld/test/ELF/ppc64-initial-exec-tls.s
deleted file mode 100644
index 9cdd3e4375c40..0000000000000
--- a/lld/test/ELF/ppc64-initial-exec-tls.s
+++ /dev/null
@@ -1,102 +0,0 @@
-// REQUIRES: ppc
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o
-// RUN: ld.lld -shared %t2.o -o %t2.so
-// RUN: ld.lld -dynamic-linker /lib64/ld64.so.2 %t.o %t2.so -o %t
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-// RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s
-// RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-tls.s -o %t2.o
-// RUN: ld.lld -shared %t2.o -o %t2.so
-// RUN: ld.lld -dynamic-linker /lib64/ld64.so.2 %t.o %t2.so -o %t
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-// RUN: llvm-objdump --section-headers %t | FileCheck --check-prefix=CheckGot %s
-// RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s
-
-	.text
-	.abiversion 2
-	.file	"intial_exec.c"
-	.globl	test_initial_exec                    # -- Begin function test_initial_exec
-	.p2align	4
-	.type	test_initial_exec,@function
-test_initial_exec:                                   # @test_initial_exec
-.Lfunc_begin0:
-.Lfunc_gep0:
-	addis 2, 12, .TOC.-.Lfunc_gep0@ha
-	addi 2, 2, .TOC.-.Lfunc_gep0@l
-.Lfunc_lep0:
-	.localentry	test_initial_exec, .Lfunc_lep0-.Lfunc_gep0
-# %bb.0:                                # %entry
-	li 3, 0
-	stw 3, -12(1)
-	addis 3, 2, a@got@tprel@ha
-	ld 3, a@got@tprel@l(3)
-	lwzx 4, 3, a@tls
-	extsw 3, 4
-	blr
-
-
-test_hi:
-.Lfunc_gep1:
-  addis 2, 12, .TOC.-.Lfunc_gep1@ha
-  addi  2, 2,  .TOC.-.Lfunc_gep1@l
-.Lfunc_lep1:
-  .localentry test2, .Lfunc_lep1-.Lfunc_gep1
-  addis 3, 0, b@got@tprel@h
-  blr
-
-test_ds:
-.Lfunc_gep2:
-  addis 2, 12, .TOC.-.Lfunc_gep2@ha
-  addi 2, 2, .TOC.-.Lfunc_gep2@l
-.Lfunc_lep2:
-  .localentry test16, .Lfunc_lep2-.Lfunc_gep2
-  addi 3, 0, c@got@tprel
-  blr
-
-// Verify that the input has every initial-exec tls relocation type.
-// InputRelocs: Relocation section '.rela.text'
-// InputRelocs: R_PPC64_GOT_TPREL16_HA {{0+}} a + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_LO_DS {{0+}} a + 0
-// InputRelocs: R_PPC64_TLS {{0+}} a + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_HI {{0+}} b + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_DS {{0+}} c + 0
-
-// There is a got entry for each tls variable that is accessed with the
-// initial-exec model to be filled in by the dynamic linker.
-// OutputRelocs: Relocation section '.rela.dyn' at offset 0x{{[0-9a-f]+}} contains 3 entries:
-// OutputRelocs: R_PPC64_TPREL64  {{0+}}  a + 0
-// OutputRelocs: R_PPC64_TPREL64  {{0+}}  b + 0
-// OutputRelocs: R_PPC64_TPREL64  {{0+}}  c + 0
-
-// Check that the got has 4 entires. (1 for the TOC and 3 entries for TLS
-// variables). Also verify the address so we can check
-// the offsets we calculated for each relocation type.
-// CheckGot: got          00000020 00000000100200c0
-
-// GOT stats at 0x100200c0, so TOC will be 0x100280c0
-
-// We are building the address of the first TLS got entry which contains the
-// offset of the tls variable relative to the thread pointer.
-// 0x100200c8 (got[1]).
-// #ha(a@got@tprel) --> (0x100200c8 - 0x100280c0 + 0x8000) >> 16 = 0
-// #lo(a@got@tprel)) --> (0x100200c8 - 0x100280c0) & 0xFFFF =  -7ff8 = -32760
-// Dis:  test_initial_exec:
-// Dis:    addis 3, 2, 0
-// Dis:    ld 3, -32760(3)
-// Dis:    lwzx 4, 3, 13
-
-// Second TLS got entry starts at got[2] 0x100200d0
-// #hi(b@got@tprel) --> (0x100200d0 - 0x100280c0) >> 16 = -1
-// Dis: test_hi:
-// Dis:   lis 3, -1
-
-// Third TLS got entry starts at got[3] 0x100200d8.
-// c@got@tprel--> (0x100200d8. -  0x100280c0) = -0x7fe8 = 32744
-// Dis: test_ds:
-// Dis:   li 3, -32744
diff --git a/lld/test/ELF/ppc64-plt-stub.s b/lld/test/ELF/ppc64-plt-stub.s
index b2de161cebf85..44ea40dca8c9c 100644
--- a/lld/test/ELF/ppc64-plt-stub.s
+++ b/lld/test/ELF/ppc64-plt-stub.s
@@ -1,28 +1,37 @@
-// REQUIRES: ppc
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
-// RUN: ld.lld -shared %t2.o -o %t2.so
-// RUN: ld.lld %t.o %t2.so -o %t
-// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
-// RUN: ld.lld -shared %t2.o -o %t2.so
-// RUN: ld.lld %t.o %t2.so -o %t
-// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
-
-// CHECK:      Disassembly of section .text:
-// CHECK-EMPTY:
-// CHECK-NEXT: _start:
-// CHECK:      10010008: bl .+16
-
-// CHECK-LABEL: 0000000010010018 __plt_foo:
-// CHECK-NEXT:      std 2, 24(1)
-// CHECK-NEXT:      addis 12, 2, 0
-// CHECK-NEXT:      ld 12, 32560(12)
-// CHECK-NEXT:      mtctr 12
-// CHECK-NEXT:      bctr
+# REQUIRES: ppc
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
+# RUN: ld.lld -shared %t2.o -soname=t2.so -o %t2.so
+# RUN: ld.lld %t.o %t2.so -o %t
+# RUN: llvm-readelf -S -d %t | FileCheck --check-prefix=SEC %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
+# RUN: ld.lld -shared %t2.o -soname=t2.so -o %t2.so
+# RUN: ld.lld %t.o %t2.so -o %t
+# RUN: llvm-readelf -S -d %t | FileCheck --check-prefix=SEC %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+## DT_PLTGOT points to .plt
+# SEC: .plt NOBITS 0000000010030000 030000 000018
+# SEC: 0x0000000000000003 (PLTGOT) 0x10030000
+
+## .plt[0] holds the address of _dl_runtime_resolve.
+## .plt[1] holds the link map.
+## The JMP_SLOT relocation is stored at .plt[2]
+# RELOC: 0x10030010 R_PPC64_JMP_SLOT foo 0x0
+
+# CHECK:      _start:
+# CHECK:      10010008: bl .+16
+
+# CHECK-LABEL: 0000000010010018 __plt_foo:
+# CHECK-NEXT:      std 2, 24(1)
+# CHECK-NEXT:      addis 12, 2, 0
+# CHECK-NEXT:      ld 12, 32560(12)
+# CHECK-NEXT:      mtctr 12
+# CHECK-NEXT:      bctr
 
 
         .text
diff --git a/lld/test/ELF/ppc64-tls-gd-le-small.s b/lld/test/ELF/ppc64-tls-gd-le-small.s
deleted file mode 100644
index 4d80f0aed081d..0000000000000
--- a/lld/test/ELF/ppc64-tls-gd-le-small.s
+++ /dev/null
@@ -1,61 +0,0 @@
-# REQUIRES: ppc
-
-# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-# RUN: llvm-objdump -d --no-show-raw-insn -r %t.o | FileCheck --check-prefix=CHECK-INPUT %s
-# RUN: ld.lld  --defsym __tls_get_addr=0x10001000 %t.o -o %t
-# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CHECK-DIS %s
-# RUN: llvm-readelf -relocations %t | FileCheck --check-prefix=DYN-RELOCS %s
-
-# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-# RUN: llvm-objdump -d --no-show-raw-insn -r %t.o | FileCheck --check-prefix=CHECK-INPUT %s
-# RUN: ld.lld  --defsym __tls_get_addr=0x10001000 %t.o -o %t
-# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=CHECK-DIS %s
-# RUN: llvm-readelf -relocations %t | FileCheck --check-prefix=DYN-RELOCS %s
-
-# Test checks the relaxation of a 'small' general-dynamic tls access into a
-# local-exec tls access.
-
-        .text
-        .abiversion 2
-
-        .global test
-        .p2align    4
-        .type test, @function
-
-test:
-.Lgep:
-    addis 2, 12, .TOC.-.Lgep@ha
-    addi  2, 2,  .TOC.-.Lgep@l
-    .localentry test, .-test
-    mflr 0
-    std 0, 16(1)
-    stdu 1, -32(1)
-    addi 3, 2, a@got@tlsgd
-    bl __tls_get_addr(a@tlsgd)
-    nop
-    lwz 3, 0(3)
-    addi 1, 1, 32
-    ld 0, 16(1)
-    mtlr 0
-    blr
-
-        .type a, @object
-        .section .tdata,"awT",@progbits
-        .global a
-        .p2align 2
-a:
-        .long 55
-        .size a, 4
-
-# CHECK-INPUT:       addi 3, 2, 0
-# CHECK-INPUT-NEXT:  R_PPC64_GOT_TLSGD16  a
-# CHECK-INPUT-NEXT:  bl .+0
-# CHECK-INPUT-NEXT:  R_PPC64_TLSGD        a
-# CHECK-INPUT-NEXT:  R_PPC64_REL24        __tls_get_addr
-
-# CHECK-DIS:      addis 3, 13, 0
-# CHECK-DIS-NEXT: nop
-# CHECK-DIS-NEXT: addi  3, 3, -28672
-# CHECK-DIS-NEXT: lwz 3, 0(3)
-
-# DYN-RELOCS: There are no relocations in this file
diff --git a/lld/test/ELF/ppc64-tls-gd-le.s b/lld/test/ELF/ppc64-tls-gd-le.s
deleted file mode 100644
index 78bffb2c41181..0000000000000
--- a/lld/test/ELF/ppc64-tls-gd-le.s
+++ /dev/null
@@ -1,79 +0,0 @@
-// REQUIRES: ppc
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: ld.lld  %t.o -o %t
-// RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s
-// RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: ld.lld  %t.o -o %t
-// RUN: llvm-objdump -d %t | FileCheck --check-prefix=Dis %s
-// RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-
-	.text
-	.abiversion 2
-	.globl	_start                    # -- Begin function _start
-	.p2align	4
-	.type	_start,@function
-_start:                                   # @_start
-.Lfunc_begin0:
-.Lfunc_gep0:
-	addis 2, 12, .TOC.-.Lfunc_gep0@ha
-	addi 2, 2, .TOC.-.Lfunc_gep0@l
-.Lfunc_lep0:
-	.localentry	_start, .Lfunc_lep0-.Lfunc_gep0
-# %bb.0:                                # %entry
-	mflr 0
-	std 31, -8(1)
-	std 0, 16(1)
-	stdu 1, -64(1)
-	mr 31, 1
-	std 30, 48(31)                  # 8-byte Folded Spill
-	li 3, 0
-	stw 3, 44(31)
-	addis 3, 2, a@got@tlsgd@ha
-	addi 3, 3, a@got@tlsgd@l
-	bl __tls_get_addr(a@tlsgd)
-	nop
-	lwz 30, 0(3)
-	extsw 3, 30
-	ld 30, 48(31)                   # 8-byte Folded Reload
-	addi 1, 1, 64
-	ld 0, 16(1)
-	ld 31, -8(1)
-	mtlr 0
-	blr
-	.long	0
-	.quad	0
-.Lfunc_end0:
-	.size	_start, .Lfunc_end0-.Lfunc_begin0
-
-                                        # -- End function
-	.type	a,@object               # @a
-	.section	.tdata,"awT",@progbits
-	.globl	a
-	.p2align	2
-a:
-	.long	55                      # 0x37
-	.size	a, 4
-
-// Verify that the input has general-dynamic tls relocation types
-// InputRelocs:  Relocation section '.rela.text'
-// InputRelocs: R_PPC64_GOT_TLSGD16_HA  {{0+}}  a + 0
-// InputRelocs: R_PPC64_GOT_TLSGD16_LO  {{0+}}  a + 0
-// InputRelocs: R_PPC64_TLSGD           {{0+}}  a + 0
-
-// Verify that the general-dynamic sequence is  relaxed to local exec.
-// #ha(a@tprel) --> (0 - 0x7000 + 0x8000) >> 16 = 0
-// #lo(a@tprel)) --> (0 - 0x7000) &  0xFFFF =  -0x7000 = -28672
-// Dis: _start:
-// Dis: nop
-// Dis: addis 3, 13, 0
-// Dis: nop
-// Dis: addi 3, 3, -28672
-
-// Verify that no general-dynamic relocations exist for the dynamic linker.
-// OutputRelocs-NOT: R_PPC64_DTPMOD64
-// OutputRelocs-NOT: R_PPC64_DTPREL64
diff --git a/lld/test/ELF/ppc64-tls-gd.s b/lld/test/ELF/ppc64-tls-gd.s
new file mode 100644
index 0000000000000..5b9c9edbb0bfc
--- /dev/null
+++ b/lld/test/ELF/ppc64-tls-gd.s
@@ -0,0 +1,102 @@
+# REQUIRES: ppc
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
+# RUN: echo '.tbss; .globl b, c; b: .zero 4; c:' | llvm-mc -filetype=obj -triple=powerpc64le - -o %t1.o
+# RUN: ld.lld -shared -soname=t1.so %t1.o -o %t1.so
+
+# RUN: ld.lld -shared %t.o %t1.o -o %t.so
+# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=GD-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=GD %s
+
+# RUN: ld.lld %t.o %t1.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+
+# RUN: ld.lld %t.o %t1.so -o %t
+# RUN: llvm-readobj -r %t | FileCheck --check-prefix=IE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s
+
+# GD-REL:      .rela.dyn {
+# GD-REL-NEXT:   0x200F0 R_PPC64_DTPMOD64 a 0x0
+# GD-REL-NEXT:   0x200F8 R_PPC64_DTPREL64 a 0x0
+# GD-REL-NEXT:   0x20100 R_PPC64_DTPMOD64 b 0x0
+# GD-REL-NEXT:   0x20108 R_PPC64_DTPREL64 b 0x0
+# GD-REL-NEXT:   0x20110 R_PPC64_DTPMOD64 c 0x0
+# GD-REL-NEXT:   0x20118 R_PPC64_DTPREL64 c 0x0
+# GD-REL-NEXT: }
+
+## &DTPMOD(a) - .TOC. = &.got[0] - (.got+0x8000) = -32768
+# GD:      addis 3, 2, 0
+# GD-NEXT: addi 3, 3, -32768
+# GD-NEXT: bl .+40
+# GD-NEXT: ld 2, 24(1)
+
+## &DTPMOD(b) - .TOC. = &.got[2] - (.got+0x8000) = -32752
+# GD-NEXT: addis 3, 2, 0
+# GD-NEXT: addi 3, 3, -32752
+# GD-NEXT: bl .+24
+# GD-NEXT: ld 2, 24(1)
+
+## &DTPMOD(b) - .TOC. = &.got[4] - (.got+0x8000) = -32736
+# GD-NEXT: li 3, -32736
+# GD-NEXT: bl .+12
+# GD-NEXT: ld 2, 24(1)
+
+# NOREL: no relocations
+
+## a@tprel = st_value(a)-0x7000 = -28664
+# LE:      nop
+# LE-NEXT: addis 3, 13, 0
+# LE-NEXT: nop
+# LE-NEXT: addi 3, 3, -28664
+## b@tprel = st_value(b)-0x7000 = -28660
+# LE:      nop
+# LE-NEXT: addis 3, 13, 0
+# LE-NEXT: nop
+# LE-NEXT: addi 3, 3, -28660
+## c@tprel = st_value(c)-0x7000 = -28656
+# LE-NEXT: addis 3, 13, 0
+# LE-NEXT: nop
+# LE-NEXT: addi 3, 3, -28656
+
+# IE-REL:      .rela.dyn {
+# IE-REL-NEXT:   0x100200C0 R_PPC64_TPREL64 b 0x0
+# IE-REL-NEXT:   0x100200C8 R_PPC64_TPREL64 c 0x0
+# IE-REL-NEXT: }
+
+## a is relaxed to use LE.
+## a@tprel = st_value(a)-0x7000 = -28664
+# IE:      nop
+# IE-NEXT: addis 3, 13, 0
+# IE-NEXT: nop
+# IE-NEXT: addi 3, 3, -28664
+## &DTPMOD(b) - .TOC. = &.got[0] - (.got+0x8000) = -32768
+# IE-NEXT: addis 3, 2, 0
+# IE-NEXT: ld 3, -32768(3)
+# IE-NEXT: nop
+# IE-NEXT: add 3, 3, 13
+## &DTPMOD(c) - .TOC. = &.got[1] - (.got+0x8000) = -32760
+## r0 is wrong. R_PPC64_GOT_TLS16 cannot be relaxed to IE but the behavior is
+## consistent with ld.bfd
+# IE-NEXT: ld 3, -32760(0)
+# IE-NEXT: nop
+# IE-NEXT: add 3, 3, 13
+
+addis 3, 2, a@got@tlsgd@ha
+addi 3, 3, a@got@tlsgd@l
+bl __tls_get_addr(a@tlsgd)
+nop
+
+addis 3, 2, b@got@tlsgd@ha
+addi 3, 3, b@got@tlsgd@l
+bl __tls_get_addr(b@tlsgd)
+nop
+
+addi 3, 0, c@got@tlsgd
+bl __tls_get_addr(c@tlsgd)
+nop
+
+.section .tbss
+.globl a
+.zero 8
+a:
+.zero 4
diff --git a/lld/test/ELF/ppc64-tls-ie-le.s b/lld/test/ELF/ppc64-tls-ie-le.s
deleted file mode 100644
index c9893107a1917..0000000000000
--- a/lld/test/ELF/ppc64-tls-ie-le.s
+++ /dev/null
@@ -1,140 +0,0 @@
-// REQUIRES: ppc
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
-// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-tls-ie-le.s -o %t2.o
-// RUN: ld.lld -dynamic-linker /lib64/ld64.so.2 %t.o %t2.o -o %t
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=Dis %s
-
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
-// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-tls-ie-le.s -o %t2.o
-// RUN: ld.lld -dynamic-linker /lib64/ld64.so.2 %t.o %t2.o -o %t
-// RUN: llvm-readelf -r %t.o | FileCheck --check-prefix=InputRelocs %s
-// RUN: llvm-readelf -r %t | FileCheck --check-prefix=OutputRelocs %s
-// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=Dis %s
-
-	.text
-	.abiversion 2
-test1:                                  # @test1
-	addis 3, 2, c@got@tprel@ha
-	ld 3, c@got@tprel@l(3)
-	lbzx 3, 3, c@tls
-	blr
-test2:                                  # @test2
-	addis 3, 2, s@got@tprel@ha
-	ld 3, s@got@tprel@l(3)
-	lhzx 3, 3, s@tls
-	blr
-test3:                                  # @test3
-	addis 3, 2, i@got@tprel@ha
-	ld 3, i@got@tprel@l(3)
-	lwzx 3, 3, i@tls
-	blr
-test4:                                  # @test4
-	addis 3, 2, l@got@tprel@ha
-	ld 3, l@got@tprel@l(3)
-	ldx 3, 3, l@tls
-	blr
-test5:                                  # @test5
-	addis 4, 2, c@got@tprel@ha
-	ld 4, c@got@tprel@l(4)
-	stbx 3, 4, c@tls
-	blr
-test6:                                  # @test6
-	addis 4, 2, s@got@tprel@ha
-	ld 4, s@got@tprel@l(4)
-	sthx 3, 4, s@tls
-	blr
-test7:                                  # @test7
-	addis 4, 2, i@got@tprel@ha
-	ld 4, i@got@tprel@l(4)
-	stwx 3, 4, i@tls
-	blr
-test8:                                  # @test8
-	addis 4, 2, l@got@tprel@ha
-	ld 4, l@got@tprel@l(4)
-	stdx 3, 4, l@tls
-	blr
-test9:                                  # @test9
-	addis 3, 2, i@got@tprel@ha
-	ld 3, i@got@tprel@l(3)
-	add 3, 3, i@tls
-	blr
-test_ds:                                  # @test_ds
-	ld 4, l@got@tprel(2)
-	stdx 3, 4, l@tls
-	blr
-
-
-// Verify that the input has initial-exec tls relocation types.
-// InputRelocs: Relocation section '.rela.text'
-// InputRelocs: R_PPC64_GOT_TPREL16_HA {{0+}} c + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_LO_DS {{0+}} c + 0
-// InputRelocs: R_PPC64_TLS {{0+}} c + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_HA {{0+}} s + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_LO_DS {{0+}} s + 0
-// InputRelocs: R_PPC64_TLS {{0+}} s + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_HA {{0+}} i + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_LO_DS {{0+}} i + 0
-// InputRelocs: R_PPC64_TLS {{0+}} i + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_HA {{0+}} l + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_LO_DS {{0+}} l + 0
-// InputRelocs: R_PPC64_TLS {{0+}} l + 0
-// InputRelocs: R_PPC64_GOT_TPREL16_DS {{0+}} l + 0
-// InputRelocs: R_PPC64_TLS {{0+}} l + 0
-
-// Verify that no initial-exec relocations exist for the dynamic linker.
-// OutputRelocs-NOT: R_PPC64_TPREL64  {{0+}}  c + 0
-// OutputRelocs-NPT: R_PPC64_TPREL64  {{0+}}  s + 0
-// OutputRelocs-NOT: R_PPC64_TPREL64  {{0+}}  i + 0
-// OutputRelocs-NOT: R_PPC64_TPREL64  {{0+}}  l + 0
-
-// Dis: test1:
-// Dis: nop
-// Dis: addis 3, 13, 0
-// Dis: lbz 3, -28672(3)
-
-// Dis: test2:
-// Dis: nop
-// Dis: addis 3, 13, 0
-// Dis: lhz 3, -28670(3)
-
-// Dis: test3:
-// Dis: nop
-// Dis: addis 3, 13, 0
-// Dis: lwz 3, -28668(3)
-
-// Dis: test4:
-// Dis: nop
-// Dis: addis 3, 13, 0
-// Dis: ld 3, -28664(3)
-
-// Dis: test5:
-// Dis: nop
-// Dis: addis 4, 13, 0
-// Dis: stb 3, -28672(4)
-
-// Dis: test6:
-// Dis: nop
-// Dis: addis 4, 13, 0
-// Dis: sth 3, -28670(4)
-
-// Dis: test7:
-// Dis: nop
-// Dis: addis 4, 13, 0
-// Dis: stw 3, -28668(4)
-
-// Dis: test8:
-// Dis: nop
-// Dis: addis 4, 13, 0
-// Dis: std 3, -28664(4)
-
-// Dis: test9:
-// Dis: nop
-// Dis: addis 3, 13, 0
-// Dis: addi 3, 3, -28668
-
-// Dis: test_ds:
-// Dis: addis 4, 13, 0
-// Dis: std 3, -28664(4)
diff --git a/lld/test/ELF/ppc64-tls-ie.s b/lld/test/ELF/ppc64-tls-ie.s
new file mode 100644
index 0000000000000..aa5509072c316
--- /dev/null
+++ b/lld/test/ELF/ppc64-tls-ie.s
@@ -0,0 +1,171 @@
+# REQUIRES: ppc
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
+# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=INPUT-REL %s
+## IE
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=IE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=IE %s
+## IE -> LE
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o
+# RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=INPUT-REL %s
+## IE
+# RUN: ld.lld -shared %t.o -o %t.so
+# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=IE-REL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t.so | FileCheck --check-prefix=IE %s
+## IE -> LE
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-readelf -r %t | FileCheck --check-prefix=NOREL %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+
+# IE-REL:      .rela.dyn {
+# IE-REL-NEXT:   0x200B0 R_PPC64_TPREL64 c 0x0
+# IE-REL-NEXT:   0x200C0 R_PPC64_TPREL64 i 0x0
+# IE-REL-NEXT:   0x200C8 R_PPC64_TPREL64 l 0x0
+# IE-REL-NEXT:   0x200B8 R_PPC64_TPREL64 s 0x0
+# IE-REL-NEXT: }
+
+# INPUT-REL: R_PPC64_GOT_TPREL16_HA c 0x0
+# INPUT-REL: R_PPC64_GOT_TPREL16_LO_DS c 0x0
+# INPUT-REL: R_PPC64_TLS c 0x0
+## &.got[0] - .TOC. = -32768
+# IE-LABEL: test1:
+# IE-NEXT:  addis 3, 2, 0
+# IE-NEXT:  ld 3, -32768(3)
+# IE-NEXT:  lbzx 3, 3, 13
+# LE-LABEL: test1:
+# LE-NEXT:   nop
+# LE-NEXT:   addis 3, 13, 0
+# LE-NEXT:   lbz 3, -28672(3)
+test1:
+  addis 3, 2, c@got@tprel@ha
+  ld 3, c@got@tprel@l(3)
+  lbzx 3, 3, c@tls
+
+# INPUT-REL: R_PPC64_GOT_TPREL16_HA s 0x0
+# INPUT-REL: R_PPC64_GOT_TPREL16_LO_DS s 0x0
+# INPUT-REL: R_PPC64_TLS s 0x0
+## &.got[1] - .TOC. = -32760
+# IE-LABEL: test2:
+# IE-NEXT:  addis 3, 2, 0
+# IE-NEXT:  ld 3, -32760(3)
+# IE-NEXT:  lhzx 3, 3, 13
+# LE-LABEL: test2:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 3, 13, 0
+# LE-NEXT:  lhz 3, -28670(3)
+test2:
+  addis 3, 2, s@got@tprel@ha
+  ld 3, s@got@tprel@l(3)
+  lhzx 3, 3, s@tls
+
+# INPUT-REL: R_PPC64_GOT_TPREL16_HA i 0x0
+# INPUT-REL: R_PPC64_GOT_TPREL16_LO_DS i 0x0
+# INPUT-REL: R_PPC64_TLS i 0x0
+## &.got[2] - .TOC. = -32752
+# IE-LABEL: test3:
+# IE-NEXT:  addis 3, 2, 0
+# IE-NEXT:  ld 3, -32752(3)
+# IE-NEXT:  lwzx 3, 3, 13
+# LE-LABEL: test3:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 3, 13, 0
+# LE-NEXT:  lwz 3, -28668(3)
+test3:
+  addis 3, 2, i@got@tprel@ha
+  ld 3, i@got@tprel@l(3)
+  lwzx 3, 3, i@tls
+
+# INPUT-REL: R_PPC64_GOT_TPREL16_HA l 0x0
+# INPUT-REL: R_PPC64_GOT_TPREL16_LO_DS l 0x0
+# INPUT-REL: R_PPC64_TLS l 0x0
+## &.got[3] - .TOC. = -32744
+# IE-LABEL: test4:
+# IE-NEXT:  addis 3, 2, 0
+# IE-NEXT:  ld 3, -32744(3)
+# IE-NEXT:  ldx 3, 3, 13
+# LE-LABEL: test4:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 3, 13, 0
+# LE-NEXT:  ld 3, -28664(3)
+test4:
+  addis 3, 2, l@got@tprel@ha
+  ld 3, l@got@tprel@l(3)
+  ldx 3, 3, l@tls
+
+# LE-LABEL: test5:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 4, 13, 0
+# LE-NEXT: stb 3, -28672(4)
+test5:
+  addis 4, 2, c@got@tprel@ha
+  ld 4, c@got@tprel@l(4)
+  stbx 3, 4, c@tls
+
+
+# LE-LABEL: test6:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 4, 13, 0
+# LE-NEXT: sth 3, -28670(4)
+test6:
+  addis 4, 2, s@got@tprel@ha
+  ld 4, s@got@tprel@l(4)
+  sthx 3, 4, s@tls
+
+
+# LE-LABEL: test7:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 4, 13, 0
+# LE-NEXT: stw 3, -28668(4)
+test7:
+  addis 4, 2, i@got@tprel@ha
+  ld 4, i@got@tprel@l(4)
+  stwx 3, 4, i@tls
+
+# LE-LABEL: test8:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 4, 13, 0
+# LE-NEXT: std 3, -28664(4)
+test8:
+  addis 4, 2, l@got@tprel@ha
+  ld 4, l@got@tprel@l(4)
+  stdx 3, 4, l@tls
+
+# LE-LABEL: test9:
+# LE-NEXT:  nop
+# LE-NEXT:  addis 3, 13, 0
+# LE-NEXT:  addi 3, 3, -28668
+test9:
+  addis 3, 2, i@got@tprel@ha
+  ld 3, i@got@tprel@l(3)
+  add 3, 3, i@tls
+
+# LE-LABEL: test_ds:
+# LE-NEXT:  addis 4, 13, 0
+# LE-NEXT: std 3, -28664(4)
+test_ds:
+  ld 4, l@got@tprel(2)
+  stdx 3, 4, l@tls
+
+# NOREL: There are no relocations in this file.
+
+.section .tdata,"awT",@progbits
+.globl c, s, i, l
+c:
+.byte 97
+
+.p2align 1
+s:
+.short 55
+
+.p2align 2
+i:
+.long 55
+
+.p2align 3
+l:
+.quad 55

From 0cadf7bb2e78b9276421a44c0e1ad3cb4520050b Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Thu, 18 Jul 2019 11:12:00 +0000
Subject: [PATCH 441/451] [lldb] Tablegenify thread commands and fix completion
 bug for thread step-*

Beside turning the options into the new tablegen format, this patch
also fixes that a few commands had source file completions for the
"count" and "end-linenumber" arguments (which both accepted only
integers). Reason for that are that somehow we added a '1' instead
of our usual '0' value to the initial value for completion.

llvm-svn: 366425
---
 lldb/source/Commands/CommandObjectThread.cpp | 53 ++++--------
 lldb/source/Commands/Options.td              | 86 ++++++++++++++++++++
 2 files changed, 100 insertions(+), 39 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp
index 3c6088d6e192e..ed7cf0a1a48d7 100644
--- a/lldb/source/Commands/CommandObjectThread.cpp
+++ b/lldb/source/Commands/CommandObjectThread.cpp
@@ -239,11 +239,8 @@ class CommandObjectIterateOverThreads : public CommandObjectParsed {
 // CommandObjectThreadBacktrace
 
 static constexpr OptionDefinition g_thread_backtrace_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1, false, "count",    'c', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeCount,      "How many frames to display (-1 for all)" },
-  { LLDB_OPT_SET_1, false, "start",    's', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeFrameIndex, "Frame in which to start the backtrace" },
-  { LLDB_OPT_SET_1, false, "extended", 'e', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeBoolean,    "Show the extended backtrace, if available" }
-    // clang-format on
+#define LLDB_OPTIONS_thread_backtrace
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads {
@@ -407,16 +404,8 @@ static constexpr OptionEnumValues TriRunningModes() {
 }
 
 static constexpr OptionDefinition g_thread_step_scope_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1, false, "step-in-avoids-no-debug",   'a', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeBoolean,           "A boolean value that sets whether stepping into functions will step over functions with no debug information." },
-  { LLDB_OPT_SET_1, false, "step-out-avoids-no-debug",  'A', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeBoolean,           "A boolean value, if true stepping out of functions will continue to step out till it hits a function with debug information." },
-  { LLDB_OPT_SET_1, false, "count",                     'c', OptionParser::eRequiredArgument, nullptr, {},                1, eArgTypeCount,             "How many times to perform the stepping operation - currently only supported for step-inst and next-inst." },
-  { LLDB_OPT_SET_1, false, "end-linenumber",            'e', OptionParser::eRequiredArgument, nullptr, {},                1, eArgTypeLineNum,           "The line at which to stop stepping - defaults to the next line and only supported for step-in and step-over.  You can also pass the string 'block' to step to the end of the current block.  This is particularly useful in conjunction with --step-target to step through a complex calling sequence." },
-  { LLDB_OPT_SET_1, false, "run-mode",                  'm', OptionParser::eRequiredArgument, nullptr, TriRunningModes(), 0, eArgTypeRunMode,           "Determine how to run other threads while stepping the current thread." },
-  { LLDB_OPT_SET_1, false, "step-over-regexp",          'r', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeRegularExpression, "A regular expression that defines function names to not to stop at when stepping in." },
-  { LLDB_OPT_SET_1, false, "step-in-target",            't', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeFunctionName,      "The name of the directly called function step in should stop at when stepping into." },
-  { LLDB_OPT_SET_2, false, "python-class",              'C', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypePythonClass,       "The name of the class that will manage this step - only supported for Scripted Step." }
-    // clang-format on
+#define LLDB_OPTIONS_thread_step_scope
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed {
@@ -995,12 +984,8 @@ static constexpr OptionEnumValues DuoRunningModes() {
 }
 
 static constexpr OptionDefinition g_thread_until_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1, false, "frame",   'f', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeFrameIndex,          "Frame index for until operation - defaults to 0" },
-  { LLDB_OPT_SET_1, false, "thread",  't', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeThreadIndex,         "Thread index for the thread for until operation" },
-  { LLDB_OPT_SET_1, false, "run-mode",'m', OptionParser::eRequiredArgument, nullptr, DuoRunningModes(), 0, eArgTypeRunMode,             "Determine how to run other threads while stepping this one" },
-  { LLDB_OPT_SET_1, false, "address", 'a', OptionParser::eRequiredArgument, nullptr, {},                0, eArgTypeAddressOrExpression, "Run until we reach the specified address, or leave the function - can be specified multiple times." }
-    // clang-format on
+#define LLDB_OPTIONS_thread_until
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadUntil : public CommandObjectParsed {
@@ -1419,10 +1404,8 @@ class CommandObjectThreadList : public CommandObjectParsed {
 // CommandObjectThreadInfo
 
 static constexpr OptionDefinition g_thread_info_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "json",      'j', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Display the thread info in JSON format." },
-  { LLDB_OPT_SET_ALL, false, "stop-info", 's', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Display the extended stop info in JSON format." }
-    // clang-format on
+#define LLDB_OPTIONS_thread_info
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadInfo : public CommandObjectIterateOverThreads {
@@ -1555,9 +1538,8 @@ class CommandObjectThreadException : public CommandObjectIterateOverThreads {
 // CommandObjectThreadReturn
 
 static constexpr OptionDefinition g_thread_return_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_ALL, false, "from-expression", 'x', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Return from the innermost expression evaluation." }
-    // clang-format on
+#define LLDB_OPTIONS_thread_return
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadReturn : public CommandObjectRaw {
@@ -1731,13 +1713,8 @@ class CommandObjectThreadReturn : public CommandObjectRaw {
 // CommandObjectThreadJump
 
 static constexpr OptionDefinition g_thread_jump_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1,                                   false, "file",    'f', OptionParser::eRequiredArgument, nullptr, {}, CommandCompletions::eSourceFileCompletion, eArgTypeFilename,            "Specifies the source file to jump to." },
-  { LLDB_OPT_SET_1,                                   true,  "line",    'l', OptionParser::eRequiredArgument, nullptr, {}, 0,                                         eArgTypeLineNum,             "Specifies the line number to jump to." },
-  { LLDB_OPT_SET_2,                                   true,  "by",      'b', OptionParser::eRequiredArgument, nullptr, {}, 0,                                         eArgTypeOffset,              "Jumps by a relative line offset from the current line." },
-  { LLDB_OPT_SET_3,                                   true,  "address", 'a', OptionParser::eRequiredArgument, nullptr, {}, 0,                                         eArgTypeAddressOrExpression, "Jumps to a specific address." },
-  { LLDB_OPT_SET_1 | LLDB_OPT_SET_2 | LLDB_OPT_SET_3, false, "force",   'r', OptionParser::eNoArgument,       nullptr, {}, 0,                                         eArgTypeNone,                "Allows the PC to leave the current function." }
-    // clang-format on
+#define LLDB_OPTIONS_thread_jump
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadJump : public CommandObjectParsed {
@@ -1879,10 +1856,8 @@ class CommandObjectThreadJump : public CommandObjectParsed {
 // CommandObjectThreadPlanList
 
 static constexpr OptionDefinition g_thread_plan_list_options[] = {
-    // clang-format off
-  { LLDB_OPT_SET_1, false, "verbose",  'v', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Display more information about the thread plans" },
-  { LLDB_OPT_SET_1, false, "internal", 'i', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Display internal as well as user thread plans" }
-    // clang-format on
+#define LLDB_OPTIONS_thread_plan_list
+#include "CommandOptions.inc"
 };
 
 class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads {
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index da0c5ba9789bb..29f8a3d99aa8a 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -52,6 +52,92 @@ let Command = "breakpoint list" in {
     "provided, which prime new targets.">;
 }
 
+let Command = "thread backtrace" in {
+  def thread_backtrace_count : Option<"count", "c">, Group<1>, Arg<"Count">,
+  Desc<"How many frames to display (-1 for all)">;
+  def thread_backtrace_start : Option<"start", "s">, Group<1>,
+  Arg<"FrameIndex">, Desc<"Frame in which to start the backtrace">;
+  def thread_backtrace_extended : Option<"extended", "e">, Group<1>,
+  Arg<"Boolean">, Desc<"Show the extended backtrace, if available">;
+}
+
+let Command = "thread step scope" in {
+  def thread_step_scope_step_in_avoids_no_debug :
+    Option<"step-in-avoids-no-debug", "a">, Group<1>, Arg<"Boolean">,
+    Desc<"A boolean value that sets whether stepping into functions will step "
+    "over functions with no debug information.">;
+  def thread_step_scope_step_out_avoids_no_debug :
+    Option<"step-out-avoids-no-debug", "A">, Group<1>, Arg<"Boolean">,
+    Desc<"A boolean value, if true stepping out of functions will continue to"
+    " step out till it hits a function with debug information.">;
+  def thread_step_scope_count : Option<"count", "c">, Group<1>, Arg<"Count">,
+    Desc<"How many times to perform the stepping operation - currently only "
+    "supported for step-inst and next-inst.">;
+  def thread_step_scope_end_linenumber : Option<"end-linenumber", "e">,
+    Group<1>, Arg<"LineNum">, Desc<"The line at which to stop stepping - "
+      "defaults to the next line and only supported for step-in and step-over."
+      "  You can also pass the string 'block' to step to the end of the current"
+      " block.  This is particularly use  in conjunction with --step-target to"
+      " step through a complex calling sequence.">;
+  def thread_step_scope_run_mode : Option<"run-mode", "m">, Group<1>,
+    EnumArg<"RunMode", "TriRunningModes()">, Desc<"Determine how to run other "
+    "threads while stepping the current thread.">;
+  def thread_step_scope_step_over_regexp : Option<"step-over-regexp", "r">,
+    Group<1>, Arg<"RegularExpression">, Desc<"A regular expression that defines"
+    "function names to not to stop at when stepping in.">;
+  def thread_step_scope_step_in_target : Option<"step-in-target", "t">,
+    Group<1>, Arg<"FunctionName">, Desc<"The name of the directly called "
+    "function step in should stop at when stepping into.">;
+  def thread_step_scope_python_class : Option<"python-class", "C">, Group<2>,
+    Arg<"PythonClass">, Desc<"The name of the class that will manage this step "
+    "- only supported for Scripted Step.">;
+}
+
+let Command = "thread until" in {
+  def thread_until_frame : Option<"frame", "f">, Group<1>, Arg<"FrameIndex">,
+    Desc<"Frame index for until operation - defaults to 0">;
+  def thread_until_thread : Option<"thread", "t">, Group<1>, Arg<"ThreadIndex">,
+    Desc<"Thread index for the thread for until operation">;
+  def thread_until_run_mode : Option<"run-mode", "m">, Group<1>,
+    EnumArg<"RunMode", "DuoRunningModes()">, Desc<"Determine how to run other"
+    "threads while stepping this one">;
+  def thread_until_address : Option<"address", "a">, Group<1>,
+    Arg<"AddressOrExpression">, Desc<"Run until we reach the specified address,"
+    "or leave the function - can be specified multiple times.">;
+}
+
+let Command = "thread info" in {
+  def thread_info_json : Option<"json", "j">, Desc<"Display the thread info in"
+    " JSON format.">;
+  def thread_info_stop_info : Option<"stop-info", "s">, Desc<"Display the "
+    "extended stop info in JSON format.">;
+}
+
+let Command = "thread return" in {
+  def thread_return_from_expression : Option<"from-expression", "x">,
+    Desc<"Return from the innermost expression evaluation.">;
+}
+
+let Command = "thread jump" in {
+  def thread_jump_file : Option<"file", "f">, Group<1>, Arg<"Filename">,
+    Completion<"SourceFile">, Desc<"Specifies the source file to jump to.">;
+  def thread_jump_line : Option<"line", "l">, Group<1>, Arg<"LineNum">,
+    Required, Desc<"Specifies the line number to jump to.">;
+  def thread_jump_by : Option<"by", "b">, Group<2>, Arg<"Offset">, Required,
+    Desc<"Jumps by a relative line offset from the current line.">;
+  def thread_jump_address : Option<"address", "a">, Group<3>,
+    Arg<"AddressOrExpression">, Required, Desc<"Jumps to a specific address.">;
+  def thread_jump_force : Option<"force", "r">, Groups<[1,2,3]>,
+    Desc<"Allows the PC to leave the current function.">;
+}
+
+let Command = "thread plan list" in {
+  def thread_plan_list_verbose : Option<"verbose", "v">, Group<1>,
+    Desc<"Display more information about the thread plans">;
+  def thread_plan_list_internal : Option<"internal", "i">, Group<1>,
+    Desc<"Display internal as well as user thread plans">;
+}
+
 let Command = "type summary add" in {
   def type_summary_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
   def type_summary_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;

From c89a3d78f43d81b9cff7b9248772ddf14d21b749 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Thu, 18 Jul 2019 11:43:45 +0000
Subject: [PATCH 442/451] [lldb][NFC] Format 'type' commands in Options.td

llvm-svn: 366426
---
 lldb/source/Commands/Options.td | 144 ++++++++++++++++++++++----------
 1 file changed, 98 insertions(+), 46 deletions(-)

diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 29f8a3d99aa8a..9cfbcd2d4ebfe 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -139,82 +139,134 @@ let Command = "thread plan list" in {
 }
 
 let Command = "type summary add" in {
-  def type_summary_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
-  def type_summary_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
-  def type_summary_add_no_value : Option<"no-value", "v">, Desc<"Don't show the value, just show the summary, for this type.">;
-  def type_summary_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
-  def type_summary_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
-  def type_summary_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
-  def type_summary_add_inline_children : Option<"inline-children", "c">, Group<1>, Required, Desc<"If true, inline all child values into summary string.">;
-  def type_summary_add_omit_names : Option<"omit-names", "O">, Group<1>, Desc<"If true, omit value names in the summary display.">;
-  def type_summary_add_summary_string : Option<"summary-string", "s">, Group<2>, Arg<"SummaryString">, Required, Desc<"Summary string used to display text and object contents.">;
-  def type_summary_add_python_script : Option<"python-script", "o">, Group<3>, Arg<"PythonScript">, Desc<"Give a one-liner Python script as part of the command.">;
-  def type_summary_add_python_function : Option<"python-function", "F">, Group<3>, Arg<"PythonFunction">, Desc<"Give the name of a Python function to use for this type.">;
-  def type_summary_add_input_python : Option<"input-python", "P">, Group<3>, Desc<"Input Python code to use for this type manually.">;
-  def type_summary_add_expand : Option<"expand", "e">, Groups<[2,3]>, Desc<"Expand aggregate data types to show children on separate lines.">;
-  def type_summary_add_hide_empty : Option<"hide-empty", "h">, Groups<[2,3]>, Desc<"Do not expand aggregate data types with no children.">;
-  def type_summary_add_name : Option<"name", "n">, Groups<[2,3]>, Arg<"Name">, Desc<"A name for this summary string.">;
+  def type_summary_add_category : Option<"category", "w">, Arg<"Name">,
+    Desc<"Add this to the given category instead of the default one.">;
+  def type_summary_add_cascade : Option<"cascade", "C">, Arg<"Boolean">,
+    Desc<"If true, cascade through typedef chains.">;
+  def type_summary_add_no_value : Option<"no-value", "v">,
+    Desc<"Don't show the value, just show the summary, for this type.">;
+  def type_summary_add_skip_pointers : Option<"skip-pointers", "p">,
+    Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_summary_add_skip_references : Option<"skip-references", "r">,
+    Desc<"Don't use this format for references-to-type objects.">;
+  def type_summary_add_regex : Option<"regex", "x">,
+    Desc<"Type names are actually regular expressions.">;
+  def type_summary_add_inline_children : Option<"inline-children", "c">,
+    Group<1>, Required,
+    Desc<"If true, inline all child values into summary string.">;
+  def type_summary_add_omit_names : Option<"omit-names", "O">, Group<1>,
+    Desc<"If true, omit value names in the summary display.">;
+  def type_summary_add_summary_string : Option<"summary-string", "s">, Group<2>,
+    Arg<"SummaryString">, Required,
+    Desc<"Summary string used to display text and object contents.">;
+  def type_summary_add_python_script : Option<"python-script", "o">, Group<3>,
+    Arg<"PythonScript">,
+    Desc<"Give a one-liner Python script as part of the command.">;
+  def type_summary_add_python_function : Option<"python-function", "F">,
+    Group<3>, Arg<"PythonFunction">,
+    Desc<"Give the name of a Python function to use for this type.">;
+  def type_summary_add_input_python : Option<"input-python", "P">, Group<3>,
+    Desc<"Input Python code to use for this type manually.">;
+  def type_summary_add_expand : Option<"expand", "e">, Groups<[2,3]>,
+    Desc<"Expand aggregate data types to show children on separate lines.">;
+  def type_summary_add_hide_empty : Option<"hide-empty", "h">, Groups<[2,3]>,
+    Desc<"Do not expand aggregate data types with no children.">;
+  def type_summary_add_name : Option<"name", "n">, Groups<[2,3]>, Arg<"Name">,
+    Desc<"A name for this summary string.">;
 }
 
 let Command = "type synth add" in {
-  def type_synth_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
-  def type_synth_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
-  def type_synth_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
-  def type_synth_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
-  def type_synth_add_python_class : Option<"python-class", "l">, Group<2>, Arg<"PythonClass">, Desc<"Use this Python class to produce synthetic children.">;
-  def type_synth_add_input_python : Option<"input-python", "P">, Group<3>, Desc<"Type Python code to generate a class that provides synthetic children.">;
-  def type_synth_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
+  def type_synth_add_cascade : Option<"cascade", "C">, Arg<"Boolean">,
+    Desc<"If true, cascade through typedef chains.">;
+  def type_synth_add_skip_pointers : Option<"skip-pointers", "p">,
+    Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_synth_add_skip_references : Option<"skip-references", "r">,
+    Desc<"Don't use this format for references-to-type objects.">;
+  def type_synth_add_category : Option<"category", "w">, Arg<"Name">,
+    Desc<"Add this to the given category instead of the default one.">;
+  def type_synth_add_python_class : Option<"python-class", "l">, Group<2>,
+    Arg<"PythonClass">,
+    Desc<"Use this Python class to produce synthetic children.">;
+  def type_synth_add_input_python : Option<"input-python", "P">, Group<3>,
+    Desc<"Type Python code to generate a class that provides synthetic "
+    "children.">;
+  def type_synth_add_regex : Option<"regex", "x">,
+    Desc<"Type names are actually regular expressions.">;
 }
 
 let Command = "type format add" in {
-  def type_format_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
-  def type_format_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
-  def type_format_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
-  def type_format_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
-  def type_format_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
-  def type_format_add_type : Option<"type", "t">, Group<2>, Arg<"Name">, Desc<"Format variables as if they were of this type.">;
+  def type_format_add_category : Option<"category", "w">, Arg<"Name">,
+    Desc<"Add this to the given category instead of the default one.">;
+  def type_format_add_cascade : Option<"cascade", "C">, Arg<"Boolean">,
+    Desc<"If true, cascade through typedef chains.">;
+  def type_format_add_skip_pointers : Option<"skip-pointers", "p">,
+    Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_format_add_skip_references : Option<"skip-references", "r">,
+    Desc<"Don't use this format for references-to-type objects.">;
+  def type_format_add_regex : Option<"regex", "x">,
+    Desc<"Type names are actually regular expressions.">;
+  def type_format_add_type : Option<"type", "t">, Group<2>, Arg<"Name">,
+    Desc<"Format variables as if they were of this type.">;
 }
 
 let Command = "type formatter delete" in {
-  def type_formatter_delete_all : Option<"all", "a">, Group<1>, Desc<"Delete from every category.">;
-  def type_formatter_delete_category : Option<"category", "w">, Group<2>, Arg<"Name">, Desc<"Delete from given category.">;
-  def type_formatter_delete_language : Option<"language", "l">, Group<3>, Arg<"Language">, Desc<"Delete from given language's category.">;
+  def type_formatter_delete_all : Option<"all", "a">, Group<1>,
+    Desc<"Delete from every category.">;
+  def type_formatter_delete_category : Option<"category", "w">, Group<2>,
+    Arg<"Name">, Desc<"Delete from given category.">;
+  def type_formatter_delete_language : Option<"language", "l">, Group<3>,
+    Arg<"Language">, Desc<"Delete from given language's category.">;
 }
 
 let Command = "type formatter clear" in {
-  def type_formatter_clear_all : Option<"all", "a">, Desc<"Clear every category.">;
+  def type_formatter_clear_all : Option<"all", "a">,
+    Desc<"Clear every category.">;
 }
 
 let Command = "type formatter list" in {
-  def type_formatter_list_category_regex : Option<"category-regex", "w">, Group<1>, Arg<"Name">, Desc<"Only show categories matching this filter.">;
-  def type_formatter_list_language : Option<"language", "l">, Group<2>, Arg<"Language">, Desc<"Only show the category for a specific language.">;
+  def type_formatter_list_category_regex : Option<"category-regex", "w">,
+    Group<1>, Arg<"Name">, Desc<"Only show categories matching this filter.">;
+  def type_formatter_list_language : Option<"language", "l">, Group<2>,
+    Arg<"Language">, Desc<"Only show the category for a specific language.">;
 }
 
 let Command = "type category define" in {
-  def type_category_define_enabled : Option<"enabled", "e">, Desc<"If specified, this category will be created enabled.">;
-  def type_category_define_language : Option<"language", "l">, Arg<"Language">, Desc<"Specify the language that this category is supported for.">;
+  def type_category_define_enabled : Option<"enabled", "e">,
+    Desc<"If specified, this category will be created enabled.">;
+  def type_category_define_language : Option<"language", "l">, Arg<"Language">,
+    Desc<"Specify the language that this category is supported for.">;
 }
 
 let Command = "type category enable" in {
-  def type_category_enable_language : Option<"language", "l">, Arg<"Language">, Desc<"Enable the category for this language.">;
+  def type_category_enable_language : Option<"language", "l">, Arg<"Language">,
+    Desc<"Enable the category for this language.">;
 }
 
 let Command = "type category disable" in {
-  def type_category_disable_language : Option<"language", "l">, Arg<"Language">, Desc<"Enable the category for this language.">;
+  def type_category_disable_language : Option<"language", "l">, Arg<"Language">,
+    Desc<"Enable the category for this language.">;
 }
 
 let Command = "type filter add" in {
-  def type_filter_add_cascade : Option<"cascade", "C">, Arg<"Boolean">, Desc<"If true, cascade through typedef chains.">;
-  def type_filter_add_skip_pointers : Option<"skip-pointers", "p">, Desc<"Don't use this format for pointers-to-type objects.">;
-  def type_filter_add_skip_references : Option<"skip-references", "r">, Desc<"Don't use this format for references-to-type objects.">;
-  def type_filter_add_category : Option<"category", "w">, Arg<"Name">, Desc<"Add this to the given category instead of the default one.">;
-  def type_filter_add_child : Option<"child", "c">, Arg<"ExpressionPath">, Desc<"Include this expression path in the synthetic view.">;
-  def type_filter_add_regex : Option<"regex", "x">, Desc<"Type names are actually regular expressions.">;
+  def type_filter_add_cascade : Option<"cascade", "C">, Arg<"Boolean">,
+    Desc<"If true, cascade through typedef chains.">;
+  def type_filter_add_skip_pointers : Option<"skip-pointers", "p">,
+    Desc<"Don't use this format for pointers-to-type objects.">;
+  def type_filter_add_skip_references : Option<"skip-references", "r">,
+    Desc<"Don't use this format for references-to-type objects.">;
+  def type_filter_add_category : Option<"category", "w">, Arg<"Name">,
+    Desc<"Add this to the given category instead of the default one.">;
+  def type_filter_add_child : Option<"child", "c">, Arg<"ExpressionPath">,
+    Desc<"Include this expression path in the synthetic view.">;
+  def type_filter_add_regex : Option<"regex", "x">,
+    Desc<"Type names are actually regular expressions.">;
 }
 
 let Command = "type lookup" in {
-  def type_lookup_show_help : Option<"show-help", "h">, Desc<"Display available help for types">;
-  def type_lookup_language : Option<"language", "l">, Arg<"Language">, Desc<"Which language's types should the search scope be">;
+  def type_lookup_show_help : Option<"show-help", "h">,
+    Desc<"Display available help for types">;
+  def type_lookup_language : Option<"language", "l">, Arg<"Language">,
+    Desc<"Which language's types should the search scope be">;
 }
 
 let Command = "watchpoint list" in {

From 2cf681a11aea459b50d712abc7136f7129e4d57f Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Thu, 18 Jul 2019 11:53:54 +0000
Subject: [PATCH 443/451] Creating release_90 branch off revision 366426

llvm-svn: 366428

From c02ac091705f0be00692cb5772c53a79f166a4c3 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:22:23 +0000
Subject: [PATCH 444/451] Drop svn version suffix.

llvm-svn: 366547
---
 libcxx/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 5df3f257294e3..ddcd478b2139d 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -27,7 +27,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL
   project(libcxx CXX C)
 
   set(PACKAGE_NAME libcxx)
-  set(PACKAGE_VERSION 9.0.0svn)
+  set(PACKAGE_VERSION 9.0.0)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 

From 135370e01e84166b1c2ecb712487774df8b67957 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:22:41 +0000
Subject: [PATCH 445/451] Drop svn version suffix.

llvm-svn: 366548
---
 libcxxabi/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 8ca169065f48e..ce69fff1370a0 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -21,7 +21,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B
   project(libcxxabi CXX C)
 
   set(PACKAGE_NAME libcxxabi)
-  set(PACKAGE_VERSION 9.0.0svn)
+  set(PACKAGE_VERSION 9.0.0)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 

From a4f7f7245008d48aeeb4bf9ef317b1c8ea09e40b Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:22:57 +0000
Subject: [PATCH 446/451] Drop svn version suffix.

llvm-svn: 366549
---
 libunwind/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index b51922a48fe28..836b286523ef1 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -83,7 +83,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B
   endif()
 
   set(PACKAGE_NAME libunwind)
-  set(PACKAGE_VERSION 9.0.0svn)
+  set(PACKAGE_VERSION 9.0.0)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 

From 24c2e53e770f5fe98d853ff04f035e3696b2cf60 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:23:25 +0000
Subject: [PATCH 447/451] Drop svn version suffix.

llvm-svn: 366550
---
 llvm/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index b8eb19848bc58..8d71dee98f486 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -25,7 +25,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
   set(LLVM_VERSION_PATCH 0)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
-  set(LLVM_VERSION_SUFFIX svn)
+  set(LLVM_VERSION_SUFFIX "")
 endif()
 
 if (NOT PACKAGE_VERSION)

From eb482e5e341fb81b3588a203a27d00b8089c0e05 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:36:22 +0000
Subject: [PATCH 448/451] Merging r366429:
 ------------------------------------------------------------------------
 r366429 | ibiryukov | 2019-07-18 13:55:33 +0200 (Thu, 18 Jul 2019) | 5 lines

Revert r366422: [OpenCL] Improve destructor support in C++ for OpenCL

Reason: this commit causes crashes in the clang compiler when building
LLVM Support with libc++, see https://bugs.llvm.org/show_bug.cgi?id=42665
for details.
------------------------------------------------------------------------

llvm-svn: 366552
---
 clang/include/clang/AST/DeclCXX.h             | 14 +----
 clang/lib/AST/DeclCXX.cpp                     | 25 +-------
 clang/lib/CodeGen/CGCXXABI.h                  | 14 ++---
 clang/lib/CodeGen/CGCall.cpp                  |  2 +-
 clang/lib/CodeGen/CGClass.cpp                 | 40 +++++--------
 clang/lib/CodeGen/CGDecl.cpp                  | 21 +++----
 clang/lib/CodeGen/CGExprCXX.cpp               | 31 +++-------
 clang/lib/CodeGen/CodeGenFunction.h           | 13 ++--
 clang/lib/CodeGen/ItaniumCXXABI.cpp           | 31 ++++------
 clang/lib/CodeGen/MicrosoftCXXABI.cpp         | 29 ++++-----
 clang/lib/Sema/SemaDeclCXX.cpp                | 50 ++++++++--------
 clang/lib/Sema/SemaOverload.cpp               |  4 +-
 clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl | 14 +++++
 .../CodeGenOpenCLCXX/addrspace-with-class.cl  | 59 -------------------
 14 files changed, 113 insertions(+), 234 deletions(-)
 create mode 100644 clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl
 delete mode 100644 clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 7add83f896244..cbf4f1397eb1f 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -2232,20 +2232,20 @@ class CXXMethodDecl : public FunctionDecl {
 
   overridden_method_range overridden_methods() const;
 
-  /// Return the parent of this method declaration, which
+  /// Returns the parent of this method declaration, which
   /// is the class in which this method is defined.
   const CXXRecordDecl *getParent() const {
     return cast<CXXRecordDecl>(FunctionDecl::getParent());
   }
 
-  /// Return the parent of this method declaration, which
+  /// Returns the parent of this method declaration, which
   /// is the class in which this method is defined.
   CXXRecordDecl *getParent() {
     return const_cast<CXXRecordDecl *>(
              cast<CXXRecordDecl>(FunctionDecl::getParent()));
   }
 
-  /// Return the type of the \c this pointer.
+  /// Returns the type of the \c this pointer.
   ///
   /// Should only be called for instance (i.e., non-static) methods. Note
   /// that for the call operator of a lambda closure type, this returns the
@@ -2253,17 +2253,9 @@ class CXXMethodDecl : public FunctionDecl {
   /// 'this' type.
   QualType getThisType() const;
 
-  /// Return the type of the object pointed by \c this.
-  ///
-  /// See getThisType() for usage restriction.
-  QualType getThisObjectType() const;
-
   static QualType getThisType(const FunctionProtoType *FPT,
                               const CXXRecordDecl *Decl);
 
-  static QualType getThisObjectType(const FunctionProtoType *FPT,
-                                    const CXXRecordDecl *Decl);
-
   Qualifiers getMethodQualifiers() const {
     return getType()->getAs<FunctionProtoType>()->getMethodQuals();
   }
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 59710a55498f2..857ac19e6b14c 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -2253,23 +2253,12 @@ CXXMethodDecl::overridden_methods() const {
   return getASTContext().overridden_methods(this);
 }
 
-static QualType getThisObjectType(ASTContext &C, const FunctionProtoType *FPT,
-                                  const CXXRecordDecl *Decl) {
-  QualType ClassTy = C.getTypeDeclType(Decl);
-  return C.getQualifiedType(ClassTy, FPT->getMethodQuals());
-}
-
 QualType CXXMethodDecl::getThisType(const FunctionProtoType *FPT,
                                     const CXXRecordDecl *Decl) {
   ASTContext &C = Decl->getASTContext();
-  QualType ObjectTy = ::getThisObjectType(C, FPT, Decl);
-  return C.getPointerType(ObjectTy);
-}
-
-QualType CXXMethodDecl::getThisObjectType(const FunctionProtoType *FPT,
-                                          const CXXRecordDecl *Decl) {
-  ASTContext &C = Decl->getASTContext();
-  return ::getThisObjectType(C, FPT, Decl);
+  QualType ClassTy = C.getTypeDeclType(Decl);
+  ClassTy = C.getQualifiedType(ClassTy, FPT->getMethodQuals());
+  return C.getPointerType(ClassTy);
 }
 
 QualType CXXMethodDecl::getThisType() const {
@@ -2284,14 +2273,6 @@ QualType CXXMethodDecl::getThisType() const {
                                     getParent());
 }
 
-QualType CXXMethodDecl::getThisObjectType() const {
-  // Ditto getThisType.
-  assert(isInstance() && "No 'this' for static methods!");
-
-  return CXXMethodDecl::getThisObjectType(getType()->getAs<FunctionProtoType>(),
-                                          getParent());
-}
-
 bool CXXMethodDecl::hasInlineBody() const {
   // If this function is a template instantiation, look at the template from
   // which it was instantiated.
diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h
index 3a9c3b3474394..511bcd00d4277 100644
--- a/clang/lib/CodeGen/CGCXXABI.h
+++ b/clang/lib/CodeGen/CGCXXABI.h
@@ -378,7 +378,7 @@ class CGCXXABI {
   virtual void EmitDestructorCall(CodeGenFunction &CGF,
                                   const CXXDestructorDecl *DD, CXXDtorType Type,
                                   bool ForVirtualBase, bool Delegating,
-                                  Address This, QualType ThisTy) = 0;
+                                  Address This) = 0;
 
   /// Emits the VTable definitions required for the given record type.
   virtual void emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -421,15 +421,11 @@ class CGCXXABI {
                                              llvm::Type *Ty,
                                              SourceLocation Loc) = 0;
 
-  using DeleteOrMemberCallExpr =
-      llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>;
-
   /// Emit the ABI-specific virtual destructor call.
-  virtual llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
-                                                 const CXXDestructorDecl *Dtor,
-                                                 CXXDtorType DtorType,
-                                                 Address This,
-                                                 DeleteOrMemberCallExpr E) = 0;
+  virtual llvm::Value *
+  EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
+                            CXXDtorType DtorType, Address This,
+                            const CXXMemberCallExpr *CE) = 0;
 
   virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF,
                                                 GlobalDecl GD,
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index cf8024550eeec..5f1fb10074829 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3502,7 +3502,7 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup {
       const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
       assert(!Dtor->isTrivial());
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false,
-                                /*Delegating=*/false, Addr, Ty);
+                                /*Delegating=*/false, Addr);
     } else {
       CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty));
     }
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index c8bb63c5c4b1f..9a9dd88810ed3 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -491,15 +491,12 @@ namespace {
         cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
 
       const CXXDestructorDecl *D = BaseClass->getDestructor();
-      // We are already inside a destructor, so presumably the object being
-      // destroyed should have the expected type.
-      QualType ThisTy = D->getThisObjectType();
       Address Addr =
         CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(),
                                                   DerivedClass, BaseClass,
                                                   BaseIsVirtual);
       CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual,
-                                /*Delegating=*/false, Addr, ThisTy);
+                                /*Delegating=*/false, Addr);
     }
   };
 
@@ -1443,11 +1440,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   if (DtorType == Dtor_Deleting) {
     RunCleanupsScope DtorEpilogue(*this);
     EnterDtorCleanups(Dtor, Dtor_Deleting);
-    if (HaveInsertPoint()) {
-      QualType ThisTy = Dtor->getThisObjectType();
+    if (HaveInsertPoint())
       EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
-                            /*Delegating=*/false, LoadCXXThisAddress(), ThisTy);
-    }
+                            /*Delegating=*/false, LoadCXXThisAddress());
     return;
   }
 
@@ -1478,9 +1473,8 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     EnterDtorCleanups(Dtor, Dtor_Complete);
 
     if (!isTryBody) {
-      QualType ThisTy = Dtor->getThisObjectType();
       EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false,
-                            /*Delegating=*/false, LoadCXXThisAddress(), ThisTy);
+                            /*Delegating=*/false, LoadCXXThisAddress());
       break;
     }
 
@@ -2019,7 +2013,7 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF,
   const CXXDestructorDecl *dtor = record->getDestructor();
   assert(!dtor->isTrivial());
   CGF.EmitCXXDestructorCall(dtor, Dtor_Complete, /*for vbase*/ false,
-                            /*Delegating=*/false, addr, type);
+                            /*Delegating=*/false, addr);
 }
 
 void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
@@ -2369,11 +2363,8 @@ namespace {
       : Dtor(D), Addr(Addr), Type(Type) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
-      // We are calling the destructor from within the constructor.
-      // Therefore, "this" should have the expected type.
-      QualType ThisTy = Dtor->getThisObjectType();
       CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false,
-                                /*Delegating=*/true, Addr, ThisTy);
+                                /*Delegating=*/true, Addr);
     }
   };
 } // end anonymous namespace
@@ -2411,32 +2402,31 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor
 void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD,
                                             CXXDtorType Type,
                                             bool ForVirtualBase,
-                                            bool Delegating, Address This,
-                                            QualType ThisTy) {
+                                            bool Delegating,
+                                            Address This) {
   CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase,
-                                     Delegating, This, ThisTy);
+                                     Delegating, This);
 }
 
 namespace {
   struct CallLocalDtor final : EHScopeStack::Cleanup {
     const CXXDestructorDecl *Dtor;
     Address Addr;
-    QualType Ty;
 
-    CallLocalDtor(const CXXDestructorDecl *D, Address Addr, QualType Ty)
-        : Dtor(D), Addr(Addr), Ty(Ty) {}
+    CallLocalDtor(const CXXDestructorDecl *D, Address Addr)
+      : Dtor(D), Addr(Addr) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                                 /*ForVirtualBase=*/false,
-                                /*Delegating=*/false, Addr, Ty);
+                                /*Delegating=*/false, Addr);
     }
   };
 } // end anonymous namespace
 
 void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D,
-                                            QualType T, Address Addr) {
-  EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr, T);
+                                            Address Addr) {
+  EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr);
 }
 
 void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) {
@@ -2446,7 +2436,7 @@ void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) {
 
   const CXXDestructorDecl *D = ClassDecl->getDestructor();
   assert(D && D->isUsed() && "destructor not marked as used!");
-  PushDestructorCleanup(D, T, Addr);
+  PushDestructorCleanup(D, Addr);
 }
 
 void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 6ad43cefc4d29..19a9e75cc5ac9 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -480,12 +480,11 @@ namespace {
 
   template <class Derived>
   struct DestroyNRVOVariable : EHScopeStack::Cleanup {
-    DestroyNRVOVariable(Address addr, QualType type, llvm::Value *NRVOFlag)
-        : NRVOFlag(NRVOFlag), Loc(addr), Ty(type) {}
+    DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag)
+        : NRVOFlag(NRVOFlag), Loc(addr) {}
 
     llvm::Value *NRVOFlag;
     Address Loc;
-    QualType Ty;
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Along the exceptions path we always execute the dtor.
@@ -512,24 +511,26 @@ namespace {
 
   struct DestroyNRVOVariableCXX final
       : DestroyNRVOVariable<DestroyNRVOVariableCXX> {
-    DestroyNRVOVariableCXX(Address addr, QualType type,
-                           const CXXDestructorDecl *Dtor, llvm::Value *NRVOFlag)
-        : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, type, NRVOFlag),
-          Dtor(Dtor) {}
+    DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor,
+                           llvm::Value *NRVOFlag)
+      : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag),
+        Dtor(Dtor) {}
 
     const CXXDestructorDecl *Dtor;
 
     void emitDestructorCall(CodeGenFunction &CGF) {
       CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                                 /*ForVirtualBase=*/false,
-                                /*Delegating=*/false, Loc, Ty);
+                                /*Delegating=*/false, Loc);
     }
   };
 
   struct DestroyNRVOVariableC final
       : DestroyNRVOVariable<DestroyNRVOVariableC> {
     DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty)
-        : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, Ty, NRVOFlag) {}
+        : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {}
+
+    QualType Ty;
 
     void emitDestructorCall(CodeGenFunction &CGF) {
       CGF.destroyNonTrivialCStruct(CGF, Loc, Ty);
@@ -1939,7 +1940,7 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
     if (emission.NRVOFlag) {
       assert(!type->isArrayType());
       CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor();
-      EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, type, dtor,
+      EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor,
                                                   emission.NRVOFlag);
       return;
     }
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 5476d13b7c461..8ad229fc0c362 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -10,13 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenFunction.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
-#include "CodeGenFunction.h"
 #include "ConstantEmitter.h"
-#include "TargetInfo.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "llvm/IR/Intrinsics.h"
@@ -91,26 +90,12 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
 }
 
 RValue CodeGenFunction::EmitCXXDestructorCall(
-    GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, QualType ThisTy,
+    GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This,
     llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) {
-  const CXXMethodDecl *DtorDecl = cast<CXXMethodDecl>(Dtor.getDecl());
-
-  assert(!ThisTy.isNull());
-  assert(ThisTy->getAsCXXRecordDecl() == DtorDecl->getParent() &&
-         "Pointer/Object mixup");
-
-  LangAS SrcAS = ThisTy.getAddressSpace();
-  LangAS DstAS = DtorDecl->getMethodQualifiers().getAddressSpace();
-  if (SrcAS != DstAS) {
-    QualType DstTy = DtorDecl->getThisType();
-    llvm::Type *NewType = CGM.getTypes().ConvertType(DstTy);
-    This = getTargetHooks().performAddrSpaceCast(*this, This, SrcAS, DstAS,
-                                                 NewType);
-  }
-
   CallArgList Args;
-  commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam,
-                                    ImplicitParamTy, CE, Args, nullptr);
+  commonEmitCXXMemberOrOperatorCall(*this, cast<CXXMethodDecl>(Dtor.getDecl()),
+                                    This, ImplicitParam, ImplicitParamTy, CE,
+                                    Args, nullptr);
   return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee,
                   ReturnValueSlot(), Args);
 }
@@ -360,9 +345,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
         Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(GD, Ty), GD);
       }
 
-      QualType ThisTy =
-          IsArrow ? Base->getType()->getPointeeType() : Base->getType();
-      EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
+      EmitCXXDestructorCall(GD, Callee, This.getPointer(),
                             /*ImplicitParam=*/nullptr,
                             /*ImplicitParamTy=*/QualType(), nullptr);
     }
@@ -1900,7 +1883,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
     CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                               /*ForVirtualBase=*/false,
                               /*Delegating=*/false,
-                              Ptr, ElementType);
+                              Ptr);
   else if (auto Lifetime = ElementType.getObjCLifetime()) {
     switch (Lifetime) {
     case Qualifiers::OCL_None:
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index c3060d1fb3514..06ef2dff7e9f5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -675,8 +675,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// PushDestructorCleanup - Push a cleanup to call the
   /// complete-object variant of the given destructor on the object at
   /// the given address.
-  void PushDestructorCleanup(const CXXDestructorDecl *Dtor, QualType T,
-                             Address Addr);
+  void PushDestructorCleanup(const CXXDestructorDecl *Dtor, Address Addr);
 
   /// PopCleanupBlock - Will pop the cleanup entry on the stack and
   /// process all branch fixups.
@@ -2555,8 +2554,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   static Destroyer destroyCXXObject;
 
   void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type,
-                             bool ForVirtualBase, bool Delegating, Address This,
-                             QualType ThisTy);
+                             bool ForVirtualBase, bool Delegating,
+                             Address This);
 
   void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
                                llvm::Type *ElementTy, Address NewPtr,
@@ -3678,9 +3677,9 @@ class CodeGenFunction : public CodeGenTypeCache {
                               llvm::Value *ImplicitParam,
                               QualType ImplicitParamTy, const CallExpr *E,
                               CallArgList *RtlArgs);
-  RValue EmitCXXDestructorCall(GlobalDecl Dtor, const CGCallee &Callee,
-                               llvm::Value *This, QualType ThisTy,
-                               llvm::Value *ImplicitParam,
+  RValue EmitCXXDestructorCall(GlobalDecl Dtor,
+                               const CGCallee &Callee,
+                               llvm::Value *This, llvm::Value *ImplicitParam,
                                QualType ImplicitParamTy, const CallExpr *E);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
                                ReturnValueSlot ReturnValue);
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 3b2413d960d63..7367ff37cf45c 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -224,8 +224,7 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
 
   void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
                           CXXDtorType Type, bool ForVirtualBase,
-                          bool Delegating, Address This,
-                          QualType ThisTy) override;
+                          bool Delegating, Address This) override;
 
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
@@ -262,8 +261,9 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
-                                         CXXDtorType DtorType, Address This,
-                                         DeleteOrMemberCallExpr E) override;
+                                         CXXDtorType DtorType,
+                                         Address This,
+                                         const CXXMemberCallExpr *CE) override;
 
   void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
 
@@ -1128,7 +1128,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
   // FIXME: Provide a source location here even though there's no
   // CXXMemberCallExpr for dtor call.
   CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
-  EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE);
+  EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr);
 
   if (UseGlobalDelete)
     CGF.PopCleanupBlock();
@@ -1539,8 +1539,7 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs(
 void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                        const CXXDestructorDecl *DD,
                                        CXXDtorType Type, bool ForVirtualBase,
-                                       bool Delegating, Address This,
-                                       QualType ThisTy) {
+                                       bool Delegating, Address This) {
   GlobalDecl GD(DD, Type);
   llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
   QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
@@ -1552,8 +1551,7 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
   else
     Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD);
 
-  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, VTT, VTTTy,
-                            nullptr);
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), VTT, VTTTy, nullptr);
 }
 
 void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1741,10 +1739,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
 llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    Address This, DeleteOrMemberCallExpr E) {
-  auto *CE = E.dyn_cast<const CXXMemberCallExpr *>();
-  auto *D = E.dyn_cast<const CXXDeleteExpr *>();
-  assert((CE != nullptr) ^ (D != nullptr));
+    Address This, const CXXMemberCallExpr *CE) {
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
@@ -1754,14 +1749,8 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
   llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
   CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
 
-  QualType ThisTy;
-  if (CE)
-    ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
-  else
-    ThisTy = D->getDestroyedType();
-
-  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr,
-                            QualType(), nullptr);
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), nullptr, QualType(),
+                            nullptr);
   return nullptr;
 }
 
diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index fa34414de5da1..a91a949d024f8 100644
--- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -258,8 +258,7 @@ class MicrosoftCXXABI : public CGCXXABI {
 
   void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
                           CXXDtorType Type, bool ForVirtualBase,
-                          bool Delegating, Address This,
-                          QualType ThisTy) override;
+                          bool Delegating, Address This) override;
 
   void emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD,
                               llvm::GlobalVariable *VTable);
@@ -297,8 +296,9 @@ class MicrosoftCXXABI : public CGCXXABI {
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
-                                         CXXDtorType DtorType, Address This,
-                                         DeleteOrMemberCallExpr E) override;
+                                         CXXDtorType DtorType,
+                                         Address This,
+                                         const CXXMemberCallExpr *CE) override;
 
   void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
                                         CallArgList &CallArgs) override {
@@ -844,7 +844,8 @@ void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
   // CXXMemberCallExpr for dtor call.
   bool UseGlobalDelete = DE->isGlobalDelete();
   CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting;
-  llvm::Value *MDThis = EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE);
+  llvm::Value *MDThis =
+      EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr);
   if (UseGlobalDelete)
     CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
 }
@@ -1568,8 +1569,7 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs(
 void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *DD,
                                          CXXDtorType Type, bool ForVirtualBase,
-                                         bool Delegating, Address This,
-                                         QualType ThisTy) {
+                                         bool Delegating, Address This) {
   // Use the base destructor variant in place of the complete destructor variant
   // if the class has no virtual bases. This effectively implements some of the
   // -mconstructor-aliases optimization, but as part of the MS C++ ABI.
@@ -1591,7 +1591,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
     BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF);
   }
 
-  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
+  CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(),
                             /*ImplicitParam=*/nullptr,
                             /*ImplicitParamTy=*/QualType(), nullptr);
   if (BaseDtorEndBB) {
@@ -1900,10 +1900,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
 llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    Address This, DeleteOrMemberCallExpr E) {
-  auto *CE = E.dyn_cast<const CXXMemberCallExpr *>();
-  auto *D = E.dyn_cast<const CXXDeleteExpr *>();
-  assert((CE != nullptr) ^ (D != nullptr));
+    Address This, const CXXMemberCallExpr *CE) {
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
@@ -1920,14 +1917,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
       llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
       DtorType == Dtor_Deleting);
 
-  QualType ThisTy;
-  if (CE)
-    ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
-  else
-    ThisTy = D->getDestroyedType();
-
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
+  RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(),
                                         ImplicitParam, Context.IntTy, CE);
   return RV.getScalarVal();
 }
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 9a6385f283196..dd77fc55721fb 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -8190,27 +8190,6 @@ void Sema::ActOnFinishDelayedCXXMethodDeclaration(Scope *S, Decl *MethodD) {
     CheckCXXDefaultArguments(Method);
 }
 
-// Emit the given diagnostic for each non-address-space qualifier.
-// Common part of CheckConstructorDeclarator and CheckDestructorDeclarator.
-static void checkMethodTypeQualifiers(Sema &S, Declarator &D, unsigned DiagID) {
-  const DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
-  if (FTI.hasMethodTypeQualifiers() && !D.isInvalidType()) {
-    bool DiagOccured = false;
-    FTI.MethodQualifiers->forEachQualifier(
-        [DiagID, &S, &DiagOccured](DeclSpec::TQ, StringRef QualName,
-                                   SourceLocation SL) {
-          // This diagnostic should be emitted on any qualifier except an addr
-          // space qualifier. However, forEachQualifier currently doesn't visit
-          // addr space qualifiers, so there's no way to write this condition
-          // right now; we just diagnose on everything.
-          S.Diag(SL, DiagID) << QualName << SourceRange(SL);
-          DiagOccured = true;
-        });
-    if (DiagOccured)
-      D.setInvalidType();
-  }
-}
-
 /// CheckConstructorDeclarator - Called by ActOnDeclarator to check
 /// the well-formedness of the constructor declarator @p D with type @p
 /// R. If there are any errors in the declarator, this routine will
@@ -8251,11 +8230,25 @@ QualType Sema::CheckConstructorDeclarator(Declarator &D, QualType R,
     D.setInvalidType();
   }
 
-  checkMethodTypeQualifiers(*this, D, diag::err_invalid_qualified_constructor);
+  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
+  if (FTI.hasMethodTypeQualifiers()) {
+    bool DiagOccured = false;
+    FTI.MethodQualifiers->forEachQualifier(
+        [&](DeclSpec::TQ TypeQual, StringRef QualName, SourceLocation SL) {
+          // This diagnostic should be emitted on any qualifier except an addr
+          // space qualifier. However, forEachQualifier currently doesn't visit
+          // addr space qualifiers, so there's no way to write this condition
+          // right now; we just diagnose on everything.
+          Diag(SL, diag::err_invalid_qualified_constructor)
+              << QualName << SourceRange(SL);
+          DiagOccured = true;
+        });
+    if (DiagOccured)
+      D.setInvalidType();
+  }
 
   // C++0x [class.ctor]p4:
   //   A constructor shall not be declared with a ref-qualifier.
-  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
   if (FTI.hasRefQualifier()) {
     Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_constructor)
       << FTI.RefQualifierIsLValueRef
@@ -8430,11 +8423,18 @@ QualType Sema::CheckDestructorDeclarator(Declarator &D, QualType R,
     }
   }
 
-  checkMethodTypeQualifiers(*this, D, diag::err_invalid_qualified_destructor);
+  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
+  if (FTI.hasMethodTypeQualifiers() && !D.isInvalidType()) {
+    FTI.MethodQualifiers->forEachQualifier(
+        [&](DeclSpec::TQ TypeQual, StringRef QualName, SourceLocation SL) {
+          Diag(SL, diag::err_invalid_qualified_destructor)
+              << QualName << SourceRange(SL);
+        });
+    D.setInvalidType();
+  }
 
   // C++0x [class.dtor]p2:
   //   A destructor shall not be declared with a ref-qualifier.
-  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
   if (FTI.hasRefQualifier()) {
     Diag(FTI.getRefQualifierLoc(), diag::err_ref_qualifier_destructor)
       << FTI.RefQualifierIsLValueRef
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index f632a4d3bd1a7..d8c4ea48ebce7 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -5093,10 +5093,12 @@ TryObjectArgumentInitialization(Sema &S, SourceLocation Loc, QualType FromType,
   QualType ClassType = S.Context.getTypeDeclType(ActingContext);
   // [class.dtor]p2: A destructor can be invoked for a const, volatile or
   //                 const volatile object.
-  Qualifiers Quals = Method->getMethodQualifiers();
+  Qualifiers Quals;
   if (isa<CXXDestructorDecl>(Method)) {
     Quals.addConst();
     Quals.addVolatile();
+  } else {
+    Quals = Method->getMethodQualifiers();
   }
 
   QualType ImplicitParamType = S.Context.getQualifiedType(ClassType, Quals);
diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl b/clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl
new file mode 100644
index 0000000000000..42c2e6e9077a8
--- /dev/null
+++ b/clang/test/CodeGenOpenCLCXX/addrspace-ctor.cl
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s
+
+struct MyType {
+  MyType(int i) : i(i) {}
+  MyType(int i) __constant : i(i) {}
+  int i;
+};
+
+//CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const1, i32 1)
+__constant MyType const1 = 1;
+//CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const2, i32 2)
+__constant MyType const2(2);
+//CHECK: call void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* addrspacecast (%struct.MyType addrspace(1)* @glob to %struct.MyType addrspace(4)*), i32 1)
+MyType glob(1);
diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl b/clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl
deleted file mode 100644
index 21ba1ca251d86..0000000000000
--- a/clang/test/CodeGenOpenCLCXX/addrspace-with-class.cl
+++ /dev/null
@@ -1,59 +0,0 @@
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=c++ -emit-llvm -O0 -o - | FileCheck %s --check-prefix=CHECK-DEFINITIONS
-
-// This test ensures the proper address spaces and address space cast are used
-// for constructors, member functions and destructors.
-// See also atexit.cl and global_init.cl for other specific tests.
-
-// CHECK: %struct.MyType = type { i32 }
-struct MyType {
-  MyType(int i) : i(i) {}
-  MyType(int i) __constant : i(i) {}
-  ~MyType() {}
-  ~MyType() __constant {}
-  int bar() { return i + 2; }
-  int bar() __constant { return i + 1; }
-  int i;
-};
-
-// CHECK: @const1 = addrspace(2) global %struct.MyType zeroinitializer
-__constant MyType const1 = 1;
-// CHECK: @const2 = addrspace(2) global %struct.MyType zeroinitializer
-__constant MyType const2(2);
-// CHECK: @glob = addrspace(1) global %struct.MyType zeroinitializer
-MyType glob(1);
-
-// CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const1, i32 1)
-// CHECK: call void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* @const2, i32 2)
-// CHECK: call void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* addrspacecast (%struct.MyType addrspace(1)* @glob to %struct.MyType addrspace(4)*), i32 1)
-
-// CHECK-LABEL: define spir_kernel void @fooGlobal()
-kernel void fooGlobal() {
-  // CHECK: call i32 @_ZNU3AS46MyType3barEv(%struct.MyType addrspace(4)* addrspacecast (%struct.MyType addrspace(1)* @glob to %struct.MyType addrspace(4)*))
-  glob.bar();
-  // CHECK: call i32 @_ZNU3AS26MyType3barEv(%struct.MyType addrspace(2)* @const1)
-  const1.bar();
-  // CHECK: call void @_ZNU3AS26MyTypeD1Ev(%struct.MyType addrspace(2)* @const1)
-  const1.~MyType();
-}
-
-// CHECK-LABEL: define spir_kernel void @fooLocal()
-kernel void fooLocal() {
-  // CHECK: [[VAR:%.*]] = alloca %struct.MyType
-  // CHECK: [[REG:%.*]] = addrspacecast %struct.MyType* [[VAR]] to %struct.MyType addrspace(4)*
-  // CHECK: call void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* [[REG]], i32 3)
-  MyType myLocal(3);
-  // CHECK: [[REG:%.*]] = addrspacecast %struct.MyType* [[VAR]] to %struct.MyType addrspace(4)*
-  // CHECK: call i32 @_ZNU3AS46MyType3barEv(%struct.MyType addrspace(4)* [[REG]])
-  myLocal.bar();
-  // CHECK: [[REG:%.*]] = addrspacecast %struct.MyType* [[VAR]] to %struct.MyType addrspace(4)*
-  // CHECK: call void @_ZNU3AS46MyTypeD1Ev(%struct.MyType addrspace(4)* [[REG]])
-}
-
-// Ensure all members are defined for all the required address spaces.
-// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS26MyTypeC1Ei(%struct.MyType addrspace(2)* %this, i32 %i)
-// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS46MyTypeC1Ei(%struct.MyType addrspace(4)* %this, i32 %i)
-// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS26MyTypeD1Ev(%struct.MyType addrspace(2)* %this)
-// CHECK-DEFINITIONS-DAG: define linkonce_odr void @_ZNU3AS46MyTypeD1Ev(%struct.MyType addrspace(4)* %this)
-// CHECK-DEFINITIONS-DAG: define linkonce_odr i32 @_ZNU3AS26MyType3barEv(%struct.MyType addrspace(2)* %this)
-// CHECK-DEFINITIONS-DAG: define linkonce_odr i32 @_ZNU3AS46MyType3barEv(%struct.MyType addrspace(4)* %this)

From 7c86e5b06ae54668c1616aa72a47c1ee490b8677 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:38:24 +0000
Subject: [PATCH 449/451] Merging r366433:
 ------------------------------------------------------------------------
 r366433 | stefan.graenitz | 2019-07-18 15:30:37 +0200 (Thu, 18 Jul 2019) | 19
 lines

[CMake] Always build debugserver on Darwin and allow tests to use the system's one

Summary:
We can always build debugserver, but we can't always sign it to be useable for testing. `LLDB_USE_SYSTEM_DEBUGSERVER` should only tell whether or not the system debugserver should be used for testing.
The old behavior complicated the logic around debugserver a lot. The new logic sorts out most of it.

Please note that this patch is in early stage and needs some more testing. It should not affect platfroms other than Darwin. It builds on Davide's approach to validate the code-signing identity at configuration time.

What do you think?

Reviewers: xiaobai, JDevlieghere, davide, compnerd, friss, labath, mgorny, jasonmolenda

Reviewed By: JDevlieghere

Subscribers: lldb-commits, #lldb

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D64806
------------------------------------------------------------------------

llvm-svn: 366553
---
 lldb/CMakeLists.txt                           |   4 -
 lldb/cmake/modules/AddLLDB.cmake              |  24 +++
 lldb/cmake/modules/LLDBConfig.cmake           |   1 +
 lldb/test/CMakeLists.txt                      |  35 ++--
 lldb/tools/debugserver/source/CMakeLists.txt  | 185 ++++++------------
 lldb/unittests/CMakeLists.txt                 |   2 +-
 .../tools/lldb-server/CMakeLists.txt          |   9 +-
 7 files changed, 115 insertions(+), 145 deletions(-)

diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index 6b1b67258b3db..609aa0bd2a606 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -107,10 +107,6 @@ if(LLDB_INCLUDE_TESTS)
     list(APPEND LLDB_TEST_DEPS lldb-server)
   endif()
 
-  if(TARGET debugserver)
-    list(APPEND LLDB_TEST_DEPS debugserver)
-  endif()
-
   if(TARGET lldb-mi)
     list(APPEND LLDB_TEST_DEPS lldb-mi)
   endif()
diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake
index 540d01362f0d8..4c99278c583bc 100644
--- a/lldb/cmake/modules/AddLLDB.cmake
+++ b/lldb/cmake/modules/AddLLDB.cmake
@@ -276,3 +276,27 @@ function(lldb_setup_rpaths name)
     INSTALL_RPATH "${LIST_INSTALL_RPATH}"
   )
 endfunction()
+
+function(lldb_find_system_debugserver path)
+  execute_process(COMMAND xcode-select -p
+                  RESULT_VARIABLE exit_code
+                  OUTPUT_VARIABLE xcode_dev_dir
+                  ERROR_VARIABLE error_msg
+                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if(exit_code)
+    message(WARNING "`xcode-select -p` failed:\n${error_msg}")
+  else()
+    set(subpath "LLDB.framework/Resources/debugserver")
+    set(path_shared "${xcode_dev_dir}/../SharedFrameworks/${subpath}")
+    set(path_private "${xcode_dev_dir}/Library/PrivateFrameworks/${subpath}")
+
+    if(EXISTS ${path_shared})
+      set(${path} ${path_shared} PARENT_SCOPE)
+    elseif(EXISTS ${path_private})
+      set(${path} ${path_private} PARENT_SCOPE)
+    else()
+      message(WARNING "System debugserver requested, but not found. "
+                      "Candidates don't exist: ${path_shared}\n${path_private}")
+    endif()
+  endif()
+endfunction()
diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake
index ac71136dd026f..b9b1ccec2c675 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -50,6 +50,7 @@ option(LLDB_USE_SYSTEM_SIX "Use six.py shipped with system and do not install a
 option(LLDB_USE_ENTITLEMENTS "When codesigning, use entitlements if available" ON)
 option(LLDB_BUILD_FRAMEWORK "Build LLDB.framework (Darwin only)" OFF)
 option(LLDB_NO_INSTALL_DEFAULT_RPATH "Disable default RPATH settings in binaries" OFF)
+option(LLDB_USE_SYSTEM_DEBUGSERVER "Use the system's debugserver for testing (Darwin only)." OFF)
 
 if(LLDB_BUILD_FRAMEWORK)
   if(NOT APPLE)
diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt
index 037222e04477b..00225e7915263 100644
--- a/lldb/test/CMakeLists.txt
+++ b/lldb/test/CMakeLists.txt
@@ -74,15 +74,6 @@ if ( CMAKE_SYSTEM_NAME MATCHES "Windows" )
   endif()
 endif()
 
-if(LLDB_CODESIGN_IDENTITY_USED)
-  list(APPEND LLDB_TEST_COMMON_ARGS --codesign-identity "${LLDB_CODESIGN_IDENTITY_USED}")
-endif()
-
-if(LLDB_BUILD_FRAMEWORK)
-  get_target_property(framework_target_dir liblldb LIBRARY_OUTPUT_DIRECTORY)
-  list(APPEND LLDB_TEST_COMMON_ARGS --framework ${framework_target_dir}/LLDB.framework)
-endif()
-
 if (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows|Darwin")
   list(APPEND LLDB_TEST_COMMON_ARGS
     --env ARCHIVER=${CMAKE_AR} --env OBJCOPY=${CMAKE_OBJCOPY})
@@ -94,12 +85,28 @@ if (NOT "${LLDB_LIT_TOOLS_DIR}" STREQUAL "")
   endif()
 endif()
 
-if(CMAKE_HOST_APPLE AND DEBUGSERVER_PATH)
-  list(APPEND LLDB_TEST_COMMON_ARGS --server ${DEBUGSERVER_PATH})
-endif()
+if(CMAKE_HOST_APPLE)
+  if(LLDB_BUILD_FRAMEWORK)
+    get_target_property(framework_build_dir liblldb LIBRARY_OUTPUT_DIRECTORY)
+    list(APPEND LLDB_TEST_COMMON_ARGS --framework ${framework_build_dir}/LLDB.framework)
+  endif()
+
+  # Use the same identity for testing
+  get_property(code_sign_identity_used GLOBAL PROPERTY LLDB_DEBUGSERVER_CODESIGN_IDENTITY)
+  if(code_sign_identity_used)
+    list(APPEND LLDB_TEST_COMMON_ARGS --codesign-identity "${code_sign_identity_used}")
+  endif()
 
-if(SKIP_TEST_DEBUGSERVER)
-  list(APPEND LLDB_TEST_COMMON_ARGS --out-of-tree-debugserver)
+  if(LLDB_USE_SYSTEM_DEBUGSERVER)
+    lldb_find_system_debugserver(system_debugserver_path)
+    message(STATUS "LLDB tests use out-of-tree debugserver: ${system_debugserver_path}")
+    list(APPEND LLDB_TEST_COMMON_ARGS --out-of-tree-debugserver)
+  else()
+    set(debugserver_path ${LLVM_RUNTIME_OUTPUT_INTDIR}/debugserver)
+    message(STATUS "LLDB Tests use just-built debugserver: ${debugserver_path}")
+    list(APPEND LLDB_TEST_COMMON_ARGS --server ${debugserver_path})
+    add_dependencies(lldb-test-deps debugserver)
+  endif()
 endif()
 
 set(LLDB_DOTEST_ARGS ${LLDB_TEST_COMMON_ARGS};${LLDB_TEST_USER_ARGS})
diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt
index cf305c9d26803..303fd28caf6ce 100644
--- a/lldb/tools/debugserver/source/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/CMakeLists.txt
@@ -6,6 +6,58 @@ include_directories(MacOSX/DarwinLog)
 
 include_directories(MacOSX)
 
+function(check_certificate identity result_valid)
+  execute_process(
+    COMMAND security find-certificate -Z -p -c ${identity} /Library/Keychains/System.keychain
+    RESULT_VARIABLE exit_code OUTPUT_QUIET ERROR_QUIET)
+  if(exit_code)
+    set(${result_valid} FALSE PARENT_SCOPE)
+  else()
+    set(${result_valid} TRUE PARENT_SCOPE)
+  endif()
+endfunction()
+
+function(get_debugserver_codesign_identity result)
+  string(CONCAT not_found_help
+    "This will cause failures in the test suite."
+    "Pass '-DLLDB_USE_SYSTEM_DEBUGSERVER=ON' to use the system one instead."
+    "See 'Code Signing on macOS' in the documentation."
+  )
+
+  # Explicit override: warn if unavailable
+  if(LLDB_CODESIGN_IDENTITY)
+    set(${result} ${LLDB_CODESIGN_IDENTITY} PARENT_SCOPE)
+    check_certificate(${LLDB_CODESIGN_IDENTITY} available)
+    if(NOT available AND NOT LLDB_USE_SYSTEM_DEBUGSERVER)
+      message(WARNING "LLDB_CODESIGN_IDENTITY not found: '${LLDB_CODESIGN_IDENTITY}' ${not_found_help}")
+    endif()
+    return()
+  endif()
+
+  # Development signing identity: use if available
+  check_certificate(lldb_codesign available)
+  if(available)
+    set(${result} lldb_codesign PARENT_SCOPE)
+    return()
+  endif()
+
+  if(NOT LLDB_USE_SYSTEM_DEBUGSERVER)
+    message(WARNING "Development code sign identiy not found: 'lldb_codesign' ${not_found_help}")
+  endif()
+
+  # LLVM pendant: fallback if available
+  if(LLVM_CODESIGNING_IDENTITY)
+    check_certificate(${LLVM_CODESIGNING_IDENTITY} available)
+    if(available)
+      set(${result} ${LLVM_CODESIGNING_IDENTITY} PARENT_SCOPE)
+      return()
+    endif()
+  endif()
+
+  # Ad-hoc signing: last resort
+  set(${result} "-" PARENT_SCOPE)
+endfunction()
+
 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -Wl,-sectcreate,__TEXT,__info_plist,${CMAKE_CURRENT_SOURCE_DIR}/../resources/lldb-debugserver-Info.plist")
 
 check_cxx_compiler_flag("-Wno-gnu-zero-variadic-macro-arguments"
@@ -30,132 +82,17 @@ check_library_exists(compression compression_encode_buffer "" HAVE_LIBCOMPRESSIO
 
 add_subdirectory(MacOSX)
 
-# LLDB-specific identity, currently used for code signing debugserver.
 set(LLDB_CODESIGN_IDENTITY "" CACHE STRING
-    "Override code sign identity for debugserver and for use in tests; falls back to LLVM_CODESIGNING_IDENTITY if set or lldb_codesign otherwise (Darwin only)")
-
-# Determine which identity to use and store it in the separate cache entry.
-# We will query it later for LLDB_TEST_COMMON_ARGS.
-if(LLDB_CODESIGN_IDENTITY)
-  set(LLDB_CODESIGN_IDENTITY_USED ${LLDB_CODESIGN_IDENTITY} CACHE INTERNAL "" FORCE)
-elseif(LLVM_CODESIGNING_IDENTITY)
-  set(LLDB_CODESIGN_IDENTITY_USED ${LLVM_CODESIGNING_IDENTITY} CACHE INTERNAL "" FORCE)
-else()
-  set(LLDB_CODESIGN_IDENTITY_USED lldb_codesign CACHE INTERNAL "" FORCE)
-endif()
+    "Identity override for debugserver; see 'Code Signing on macOS' in the documentation (Darwin only)")
 
-# Override locally, so the identity is used for targets created in this scope.
-set(LLVM_CODESIGNING_IDENTITY ${LLDB_CODESIGN_IDENTITY_USED})
-
-option(LLDB_NO_DEBUGSERVER "Disable the debugserver target" OFF)
-option(LLDB_USE_SYSTEM_DEBUGSERVER "Use the system's debugserver instead of building it from source (Darwin only)." OFF)
+get_debugserver_codesign_identity(debugserver_codesign_identity)
 
-# Incompatible options
-if(LLDB_NO_DEBUGSERVER AND LLDB_USE_SYSTEM_DEBUGSERVER)
-  message(FATAL_ERROR "Inconsistent options: LLDB_NO_DEBUGSERVER and LLDB_USE_SYSTEM_DEBUGSERVER")
-endif()
-
-# Try to locate the system debugserver.
-# Subsequent feasibility checks depend on it.
-if(APPLE AND CMAKE_HOST_APPLE)
-  execute_process(
-    COMMAND xcode-select -p
-    OUTPUT_VARIABLE xcode_dev_dir)
-  string(STRIP ${xcode_dev_dir} xcode_dev_dir)
-
-  set(debugserver_rel_path "LLDB.framework/Resources/debugserver")
-  set(debugserver_shared "${xcode_dev_dir}/../SharedFrameworks/${debugserver_rel_path}")
-  set(debugserver_private "${xcode_dev_dir}/Library/PrivateFrameworks/${debugserver_rel_path}")
-
-  if(EXISTS ${debugserver_shared})
-    set(system_debugserver ${debugserver_shared})
-  elseif(EXISTS ${debugserver_private})
-    set(system_debugserver ${debugserver_private})
-  endif()
-endif()
-
-# Handle unavailability
-if(LLDB_USE_SYSTEM_DEBUGSERVER)
-  if(system_debugserver)
-    set(use_system_debugserver ON)
-  elseif(APPLE AND CMAKE_HOST_APPLE)
-    # Binary not found on system. Keep cached variable, to try again on reconfigure.
-    message(SEND_ERROR
-      "LLDB_USE_SYSTEM_DEBUGSERVER option set, but no debugserver found in:\
-        ${debugserver_shared}\
-        ${debugserver_private}")
-  else()
-    # Non-Apple target platform or non-Darwin host. Reset invalid cached variable.
-    message(WARNING "Reverting invalid option LLDB_USE_SYSTEM_DEBUGSERVER (Darwin only)")
-    set(LLDB_USE_SYSTEM_DEBUGSERVER OFF CACHE BOOL "" FORCE)
-  endif()
-elseif(NOT LLDB_NO_DEBUGSERVER)
-  # Default case: on Darwin we need the right code signing ID.
-  # See lldb/docs/code-signing.txt for details.
-  if(CMAKE_HOST_APPLE AND NOT LLVM_CODESIGNING_IDENTITY STREQUAL "lldb_codesign")
-    message(WARNING "Codesigning debugserver with identity ${LLVM_CODESIGNING_IDENTITY}. "
-                    "The usual setup uses the \"lldb_codesign\" identity created with "
-                    "scripts/macos-setup-codesign.sh. As a result your debugserver might "
-                    "not be able to attach to processes.\n"
-                    "Pass -DLLDB_CODESIGN_IDENTITY=lldb_codesign to use the development "
-                    "signing identity.")
-  endif()
-  set(build_and_sign_debugserver ON)
-endif()
-
-# TODO: We don't use the $<TARGET_FILE:debugserver> generator expression here,
-# because the value of DEBUGSERVER_PATH is used to build LLDB_DOTEST_ARGS,
-# which is used for configuring lldb-dotest.in, which does not have a generator
-# step at the moment.
-set(default_debugserver_path "${LLVM_RUNTIME_OUTPUT_INTDIR}/debugserver${CMAKE_EXECUTABLE_SUFFIX}")
-
-# Remember where debugserver binary goes and whether or not we have to test it.
-set(DEBUGSERVER_PATH "" CACHE FILEPATH "Path to debugserver")
-set(SKIP_TEST_DEBUGSERVER OFF CACHE BOOL "Building the in-tree debugserver was skipped")
-
-# Reset values in all cases in order to correctly support reconfigurations.
-if(use_system_debugserver)
-  add_custom_target(debugserver
-    COMMAND ${CMAKE_COMMAND} -E copy_if_different
-            ${system_debugserver} ${LLVM_RUNTIME_OUTPUT_INTDIR}
-    COMMENT "Copying the system debugserver to LLDB's binaries directory.")
-
-  set_target_properties(debugserver PROPERTIES FOLDER "lldb libraries/debugserver")
-
-  # Don't test debugserver itself.
-  # Tests that require debugserver will use the copy.
-  set(DEBUGSERVER_PATH ${default_debugserver_path} CACHE FILEPATH "" FORCE)
-  set(SKIP_TEST_DEBUGSERVER ON CACHE BOOL "" FORCE)
-
-  message(STATUS "Copy system debugserver from: ${system_debugserver}")
-elseif(build_and_sign_debugserver)
-  # Build, sign and test debugserver (below)
-  set(DEBUGSERVER_PATH ${default_debugserver_path} CACHE FILEPATH "" FORCE)
-  set(SKIP_TEST_DEBUGSERVER OFF CACHE BOOL "" FORCE)
-
-  message(STATUS "lldb debugserver: ${DEBUGSERVER_PATH}")
-else()
-  # No tests for debugserver, no tests that require it.
-  set(DEBUGSERVER_PATH "" CACHE FILEPATH "" FORCE)
-  set(SKIP_TEST_DEBUGSERVER ON CACHE BOOL "" FORCE)
-
-  message(STATUS "lldb debugserver will not be available.")
-endif()
+# Override locally, so the identity is used for targets created in this scope.
+set(LLVM_CODESIGNING_IDENTITY ${debugserver_codesign_identity})
 
-# On MacOS, debugserver needs to be codesigned when built. Check if we have
-# a certificate instead of failing in the middle of the build.
-if(build_and_sign_debugserver)
-  execute_process(
-    COMMAND security find-certificate -Z -p -c ${LLDB_CODESIGN_IDENTITY_USED} /Library/Keychains/System.keychain
-    RESULT_VARIABLE cert_return
-    OUTPUT_QUIET
-    ERROR_QUIET)
-
-  if (cert_return)
-    message(FATAL_ERROR "Certificate for debugserver not found. Run scripts/macos-setup-codesign.sh or "
-                        "use the system debugserver passing -DLLDB_USE_SYSTEM_DEBUGSERVER=ON to CMake")
-  endif()
-endif()
+# Use the same identity later in the test suite.
+set_property(GLOBAL PROPERTY
+  LLDB_DEBUGSERVER_CODESIGN_IDENTITY ${debugserver_codesign_identity})
 
 if(APPLE)
   if(IOS)
@@ -190,7 +127,7 @@ if(LLDB_USE_ENTITLEMENTS)
   endif()
 endif()
 
-if(build_and_sign_debugserver)
+#if(build_and_sign_debugserver)
   set(generated_mach_interfaces
     ${CMAKE_CURRENT_BINARY_DIR}/mach_exc.h
     ${CMAKE_CURRENT_BINARY_DIR}/mach_excServer.c
@@ -318,4 +255,4 @@ if(build_and_sign_debugserver)
         ${entitlements}
       )
   endif()
-endif()
+#endif()
diff --git a/lldb/unittests/CMakeLists.txt b/lldb/unittests/CMakeLists.txt
index 311f47b1b0ec7..8e6b3e7b341a0 100644
--- a/lldb/unittests/CMakeLists.txt
+++ b/lldb/unittests/CMakeLists.txt
@@ -78,6 +78,6 @@ add_subdirectory(tools)
 add_subdirectory(UnwindAssembly)
 add_subdirectory(Utility)
 
-if(LLDB_CAN_USE_DEBUGSERVER AND NOT SKIP_TEST_DEBUGSERVER)
+if(LLDB_CAN_USE_DEBUGSERVER AND NOT LLDB_USE_SYSTEM_DEBUGSERVER)
   add_subdirectory(debugserver)
 endif()
diff --git a/lldb/unittests/tools/lldb-server/CMakeLists.txt b/lldb/unittests/tools/lldb-server/CMakeLists.txt
index 3bae69bfa4a12..7d52181f44b90 100644
--- a/lldb/unittests/tools/lldb-server/CMakeLists.txt
+++ b/lldb/unittests/tools/lldb-server/CMakeLists.txt
@@ -13,8 +13,13 @@ endfunction()
 add_lldb_test_executable(thread_inferior inferior/thread_inferior.cpp)
 add_lldb_test_executable(environment_check inferior/environment_check.cpp)
 
-if(DEBUGSERVER_PATH)
-  add_definitions(-DLLDB_SERVER="${DEBUGSERVER_PATH}" -DLLDB_SERVER_IS_DEBUGSERVER=1)
+if(LLDB_CAN_USE_DEBUGSERVER)
+  if(LLDB_USE_SYSTEM_DEBUGSERVER)
+    lldb_find_system_debugserver(debugserver_path)
+  else()
+    set(debugserver_path $<TARGET_FILE:debugserver>)
+  endif()
+  add_definitions(-DLLDB_SERVER="${debugserver_path}" -DLLDB_SERVER_IS_DEBUGSERVER=1)
 else()
   add_definitions(-DLLDB_SERVER="$<TARGET_FILE:lldb-server>" -DLLDB_SERVER_IS_DEBUGSERVER=0)
 endif()

From b51b64e4f4bdac842195621745c0176c2dcd050f Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:43:12 +0000
Subject: [PATCH 450/451] Merging r366480:
 ------------------------------------------------------------------------
 r366480 | asb | 2019-07-18 20:29:59 +0200 (Thu, 18 Jul 2019) | 25 lines

[RISCV] Hard float ABI support

The RISC-V hard float calling convention requires the frontend to:

* Detect cases where, once "flattened", a struct can be passed using
int+fp or fp+fp registers under the hard float ABI and coerce to the
appropriate type(s)
* Track usage of GPRs and FPRs in order to gate the above, and to
determine when signext/zeroext attributes must be added to integer
scalars

This patch attempts to do this in compliance with the documented ABI,
and uses ABIArgInfo::CoerceAndExpand in order to do this. @rjmccall, as
author of that code I've tagged you as reviewer for initial feedback on
my usage.

Note that a previous version of the ABI indicated that when passing an
int+fp struct using a GPR+FPR, the int would need to be sign or
zero-extended appropriately. GCC never did this and the ABI was changed,
which makes life easier as ABIArgInfo::CoerceAndExpand can't currently
handle sign/zero-extension attributes.

Re-landed after backing out 366450 due to missed hunks.

Differential Revision: https://reviews.llvm.org/D60456
------------------------------------------------------------------------

llvm-svn: 366554
---
 clang/lib/Basic/Targets/RISCV.cpp             |  13 +-
 clang/lib/Basic/Targets/RISCV.h               |   6 +-
 clang/lib/CodeGen/TargetInfo.cpp              | 289 +++++++++++++++++-
 clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c |   6 +-
 .../CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c |   4 +
 clang/test/CodeGen/riscv32-ilp32d-abi.c       | 282 +++++++++++++++++
 clang/test/CodeGen/riscv32-ilp32f-abi.c       |  45 +++
 .../test/CodeGen/riscv32-ilp32f-ilp32d-abi.c  | 275 +++++++++++++++++
 clang/test/CodeGen/riscv64-lp64-lp64f-abi.c   |   2 +
 .../CodeGen/riscv64-lp64-lp64f-lp64d-abi.c    |   4 +
 clang/test/CodeGen/riscv64-lp64d-abi.c        | 272 +++++++++++++++++
 clang/test/CodeGen/riscv64-lp64f-lp64d-abi.c  | 265 ++++++++++++++++
 clang/test/Driver/riscv-abi.c                 |  20 +-
 .../test/Preprocessor/riscv-target-features.c |  24 ++
 14 files changed, 1474 insertions(+), 33 deletions(-)
 create mode 100644 clang/test/CodeGen/riscv32-ilp32d-abi.c
 create mode 100644 clang/test/CodeGen/riscv32-ilp32f-abi.c
 create mode 100644 clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
 create mode 100644 clang/test/CodeGen/riscv64-lp64d-abi.c
 create mode 100644 clang/test/CodeGen/riscv64-lp64f-lp64d-abi.c

diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index f800bb0b25dac..58272d14abd1f 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -65,9 +65,18 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__riscv");
   bool Is64Bit = getTriple().getArch() == llvm::Triple::riscv64;
   Builder.defineMacro("__riscv_xlen", Is64Bit ? "64" : "32");
-  // TODO: modify when more code models and ABIs are supported.
+  // TODO: modify when more code models are supported.
   Builder.defineMacro("__riscv_cmodel_medlow");
-  Builder.defineMacro("__riscv_float_abi_soft");
+
+  StringRef ABIName = getABI();
+  if (ABIName == "ilp32f" || ABIName == "lp64f")
+    Builder.defineMacro("__riscv_float_abi_single");
+  else if (ABIName == "ilp32d" || ABIName == "lp64d")
+    Builder.defineMacro("__riscv_float_abi_double");
+  else if (ABIName == "ilp32e")
+    Builder.defineMacro("__riscv_abi_rve");
+  else
+    Builder.defineMacro("__riscv_float_abi_soft");
 
   if (HasM) {
     Builder.defineMacro("__riscv_mul");
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index bc814b79ce516..ce193feaeb980 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -87,8 +87,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
   }
 
   bool setABI(const std::string &Name) override {
-    // TODO: support ilp32f and ilp32d ABIs.
-    if (Name == "ilp32") {
+    if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") {
       ABI = Name;
       return true;
     }
@@ -105,8 +104,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
   }
 
   bool setABI(const std::string &Name) override {
-    // TODO: support lp64f and lp64d ABIs.
-    if (Name == "lp64") {
+    if (Name == "lp64" || Name == "lp64f" || Name == "lp64d") {
       ABI = Name;
       return true;
     }
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 5da988fb8a3c5..1e1038dbfe953 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -9188,25 +9188,45 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
 namespace {
 class RISCVABIInfo : public DefaultABIInfo {
 private:
-  unsigned XLen; // Size of the integer ('x') registers in bits.
+  // Size of the integer ('x') registers in bits.
+  unsigned XLen;
+  // Size of the floating point ('f') registers in bits. Note that the target
+  // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
+  // with soft float ABI has FLen==0).
+  unsigned FLen;
   static const int NumArgGPRs = 8;
+  static const int NumArgFPRs = 8;
+  bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
+                                      llvm::Type *&Field1Ty,
+                                      CharUnits &Field1Off,
+                                      llvm::Type *&Field2Ty,
+                                      CharUnits &Field2Off) const;
 
 public:
-  RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
-      : DefaultABIInfo(CGT), XLen(XLen) {}
+  RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
+      : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}
 
   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
   // non-virtual, but computeInfo is virtual, so we overload it.
   void computeInfo(CGFunctionInfo &FI) const override;
 
-  ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed,
-                                  int &ArgGPRsLeft) const;
+  ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
+                                  int &ArgFPRsLeft) const;
   ABIArgInfo classifyReturnType(QualType RetTy) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
 
   ABIArgInfo extendType(QualType Ty) const;
+
+  bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
+                                CharUnits &Field1Off, llvm::Type *&Field2Ty,
+                                CharUnits &Field2Off, int &NeededArgGPRs,
+                                int &NeededArgFPRs) const;
+  ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
+                                               CharUnits Field1Off,
+                                               llvm::Type *Field2Ty,
+                                               CharUnits Field2Off) const;
 };
 } // end anonymous namespace
 
@@ -9228,18 +9248,215 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
   // different for variadic arguments, we must also track whether we are
   // examining a vararg or not.
   int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
+  int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
   int NumFixedArgs = FI.getNumRequiredArgs();
 
   int ArgNum = 0;
   for (auto &ArgInfo : FI.arguments()) {
     bool IsFixed = ArgNum < NumFixedArgs;
-    ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft);
+    ArgInfo.info =
+        classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
     ArgNum++;
   }
 }
 
+// Returns true if the struct is a potential candidate for the floating point
+// calling convention. If this function returns true, the caller is
+// responsible for checking that if there is only a single field then that
+// field is a float.
+bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
+                                                  llvm::Type *&Field1Ty,
+                                                  CharUnits &Field1Off,
+                                                  llvm::Type *&Field2Ty,
+                                                  CharUnits &Field2Off) const {
+  bool IsInt = Ty->isIntegralOrEnumerationType();
+  bool IsFloat = Ty->isRealFloatingType();
+
+  if (IsInt || IsFloat) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (IsInt && Size > XLen)
+      return false;
+    // Can't be eligible if larger than the FP registers. Half precision isn't
+    // currently supported on RISC-V and the ABI hasn't been confirmed, so
+    // default to the integer ABI in that case.
+    if (IsFloat && (Size > FLen || Size < 32))
+      return false;
+    // Can't be eligible if an integer type was already found (int+int pairs
+    // are not eligible).
+    if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
+      return false;
+    if (!Field1Ty) {
+      Field1Ty = CGT.ConvertType(Ty);
+      Field1Off = CurOff;
+      return true;
+    }
+    if (!Field2Ty) {
+      Field2Ty = CGT.ConvertType(Ty);
+      Field2Off = CurOff;
+      return true;
+    }
+    return false;
+  }
+
+  if (auto CTy = Ty->getAs<ComplexType>()) {
+    if (Field1Ty)
+      return false;
+    QualType EltTy = CTy->getElementType();
+    if (getContext().getTypeSize(EltTy) > FLen)
+      return false;
+    Field1Ty = CGT.ConvertType(EltTy);
+    Field1Off = CurOff;
+    assert(CurOff.isZero() && "Unexpected offset for first field");
+    Field2Ty = Field1Ty;
+    Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
+    return true;
+  }
+
+  if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
+    uint64_t ArraySize = ATy->getSize().getZExtValue();
+    QualType EltTy = ATy->getElementType();
+    CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
+    for (uint64_t i = 0; i < ArraySize; ++i) {
+      bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
+                                                Field1Off, Field2Ty, Field2Off);
+      if (!Ret)
+        return false;
+      CurOff += EltSize;
+    }
+    return true;
+  }
+
+  if (const auto *RTy = Ty->getAs<RecordType>()) {
+    // Structures with either a non-trivial destructor or a non-trivial
+    // copy constructor are not eligible for the FP calling convention.
+    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, CGT.getCXXABI()))
+      return false;
+    if (isEmptyRecord(getContext(), Ty, true))
+      return true;
+    const RecordDecl *RD = RTy->getDecl();
+    // Unions aren't eligible unless they're empty (which is caught above).
+    if (RD->isUnion())
+      return false;
+    int ZeroWidthBitFieldCount = 0;
+    for (const FieldDecl *FD : RD->fields()) {
+      const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+      uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
+      QualType QTy = FD->getType();
+      if (FD->isBitField()) {
+        unsigned BitWidth = FD->getBitWidthValue(getContext());
+        // Allow a bitfield with a type greater than XLen as long as the
+        // bitwidth is XLen or less.
+        if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen)
+          QTy = getContext().getIntTypeForBitwidth(XLen, false);
+        if (BitWidth == 0) {
+          ZeroWidthBitFieldCount++;
+          continue;
+        }
+      }
+
+      bool Ret = detectFPCCEligibleStructHelper(
+          QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
+          Field1Ty, Field1Off, Field2Ty, Field2Off);
+      if (!Ret)
+        return false;
+
+      // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
+      // or int+fp structs, but are ignored for a struct with an fp field and
+      // any number of zero-width bitfields.
+      if (Field2Ty && ZeroWidthBitFieldCount > 0)
+        return false;
+    }
+    return Field1Ty != nullptr;
+  }
+
+  return false;
+}
+
+// Determine if a struct is eligible for passing according to the floating
+// point calling convention (i.e., when flattened it contains a single fp
+// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
+// NeededArgGPRs are incremented appropriately.
+bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
+                                            CharUnits &Field1Off,
+                                            llvm::Type *&Field2Ty,
+                                            CharUnits &Field2Off,
+                                            int &NeededArgGPRs,
+                                            int &NeededArgFPRs) const {
+  Field1Ty = nullptr;
+  Field2Ty = nullptr;
+  NeededArgGPRs = 0;
+  NeededArgFPRs = 0;
+  bool IsCandidate = detectFPCCEligibleStructHelper(
+      Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
+  // Not really a candidate if we have a single int but no float.
+  if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
+    return IsCandidate = false;
+  if (!IsCandidate)
+    return false;
+  if (Field1Ty && Field1Ty->isFloatingPointTy())
+    NeededArgFPRs++;
+  else if (Field1Ty)
+    NeededArgGPRs++;
+  if (Field2Ty && Field2Ty->isFloatingPointTy())
+    NeededArgFPRs++;
+  else if (Field2Ty)
+    NeededArgGPRs++;
+  return IsCandidate;
+}
+
+// Call getCoerceAndExpand for the two-element flattened struct described by
+// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
+// appropriate coerceToType and unpaddedCoerceToType.
+ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
+    llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
+    CharUnits Field2Off) const {
+  SmallVector<llvm::Type *, 3> CoerceElts;
+  SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
+  if (!Field1Off.isZero())
+    CoerceElts.push_back(llvm::ArrayType::get(
+        llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
+
+  CoerceElts.push_back(Field1Ty);
+  UnpaddedCoerceElts.push_back(Field1Ty);
+
+  if (!Field2Ty) {
+    return ABIArgInfo::getCoerceAndExpand(
+        llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
+        UnpaddedCoerceElts[0]);
+  }
+
+  CharUnits Field2Align =
+      CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
+  CharUnits Field1Size =
+      CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
+  CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align);
+
+  CharUnits Padding = CharUnits::Zero();
+  if (Field2Off > Field2OffNoPadNoPack)
+    Padding = Field2Off - Field2OffNoPadNoPack;
+  else if (Field2Off != Field2Align && Field2Off > Field1Size)
+    Padding = Field2Off - Field1Size;
+
+  bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
+
+  if (!Padding.isZero())
+    CoerceElts.push_back(llvm::ArrayType::get(
+        llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
+
+  CoerceElts.push_back(Field2Ty);
+  UnpaddedCoerceElts.push_back(Field2Ty);
+
+  auto CoerceToType =
+      llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
+  auto UnpaddedCoerceToType =
+      llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
+
+  return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
+}
+
 ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
-                                              int &ArgGPRsLeft) const {
+                                              int &ArgGPRsLeft,
+                                              int &ArgFPRsLeft) const {
   assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -9257,6 +9474,42 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
     return ABIArgInfo::getIgnore();
 
   uint64_t Size = getContext().getTypeSize(Ty);
+
+  // Pass floating point values via FPRs if possible.
+  if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) {
+    ArgFPRsLeft--;
+    return ABIArgInfo::getDirect();
+  }
+
+  // Complex types for the hard float ABI must be passed direct rather than
+  // using CoerceAndExpand.
+  if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) {
+    QualType EltTy = Ty->getAs<ComplexType>()->getElementType();
+    if (getContext().getTypeSize(EltTy) <= FLen) {
+      ArgFPRsLeft -= 2;
+      return ABIArgInfo::getDirect();
+    }
+  }
+
+  if (IsFixed && FLen && Ty->isStructureOrClassType()) {
+    llvm::Type *Field1Ty = nullptr;
+    llvm::Type *Field2Ty = nullptr;
+    CharUnits Field1Off = CharUnits::Zero();
+    CharUnits Field2Off = CharUnits::Zero();
+    int NeededArgGPRs;
+    int NeededArgFPRs;
+    bool IsCandidate =
+        detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
+                                 NeededArgGPRs, NeededArgFPRs);
+    if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
+        NeededArgFPRs <= ArgFPRsLeft) {
+      ArgGPRsLeft -= NeededArgGPRs;
+      ArgFPRsLeft -= NeededArgFPRs;
+      return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
+                                               Field2Off);
+    }
+  }
+
   uint64_t NeededAlign = getContext().getTypeAlign(Ty);
   bool MustUseStack = false;
   // Determine the number of GPRs needed to pass the current argument
@@ -9315,10 +9568,12 @@ ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getIgnore();
 
   int ArgGPRsLeft = 2;
+  int ArgFPRsLeft = FLen ? 2 : 0;
 
   // The rules for return and argument types are the same, so defer to
   // classifyArgumentType.
-  return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft);
+  return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
+                              ArgFPRsLeft);
 }
 
 Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -9353,8 +9608,9 @@ ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
 namespace {
 class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
-      : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {}
+  RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
+                         unsigned FLen)
+      : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
@@ -9493,9 +9749,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
 
   case llvm::Triple::riscv32:
-    return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32));
-  case llvm::Triple::riscv64:
-    return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64));
+  case llvm::Triple::riscv64: {
+    StringRef ABIStr = getTarget().getABI();
+    unsigned XLen = getTarget().getPointerWidth(0);
+    unsigned ABIFLen = 0;
+    if (ABIStr.endswith("f"))
+      ABIFLen = 32;
+    else if (ABIStr.endswith("d"))
+      ABIFLen = 64;
+    return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen));
+  }
 
   case llvm::Triple::systemz: {
     bool HasVector = getTarget().getABI() == "vector";
diff --git a/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c b/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
index 0c2f0791e316b..677040626f578 100644
--- a/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
 
 // This file contains test cases that will have the same output for the ilp32
 // and ilp32f ABIs.
@@ -35,8 +37,8 @@ int f_scalar_stack_1(int32_t a, int64_t b, int32_t c, double d, long double e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
-struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d,
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+struct large f_scalar_stack_2(int32_t a, int64_t b, double c, long double d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};
 }
diff --git a/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
index 12837fce9422f..fa11c1772d726 100644
--- a/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
@@ -1,6 +1,10 @@
 // RUN: %clang_cc1 -triple riscv32 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple riscv32 -emit-llvm -fforce-enable-int128 %s -o - \
 // RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-FORCEINT128
+// RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+// RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
 
 // This file contains test cases that will have the same output for the ilp32,
 // ilp32f, and ilp32d ABIs.
diff --git a/clang/test/CodeGen/riscv32-ilp32d-abi.c b/clang/test/CodeGen/riscv32-ilp32d-abi.c
new file mode 100644
index 0000000000000..b10656cf123e4
--- /dev/null
+++ b/clang/test/CodeGen/riscv32-ilp32d-abi.c
@@ -0,0 +1,282 @@
+// RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Doubles are passed in FPRs, so argument 'i' will be passed zero-extended
+// because it will be passed in a GPR.
+
+// CHECK: define void @f_fpr_tracking(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, i8 zeroext %i)
+void f_fpr_tracking(double a, double b, double c, double d, double e, double f,
+                    double g, double h, uint8_t i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it
+// were a standalone floating-point real.
+
+struct double_s { double f; };
+
+// CHECK: define void @f_double_s_arg(double)
+void f_double_s_arg(struct double_s a) {}
+
+// CHECK: define double @f_ret_double_s()
+struct double_s f_ret_double_s() {
+  return (struct double_s){1.0};
+}
+
+// A struct containing a double and any number of zero-width bitfields is
+// passed as though it were a standalone floating-point real.
+
+struct zbf_double_s { int : 0; double f; };
+struct zbf_double_zbf_s { int : 0; double f; int : 0; };
+
+// CHECK: define void @f_zbf_double_s_arg(double)
+void f_zbf_double_s_arg(struct zbf_double_s a) {}
+
+// CHECK: define double @f_ret_zbf_double_s()
+struct zbf_double_s f_ret_zbf_double_s() {
+  return (struct zbf_double_s){1.0};
+}
+
+// CHECK: define void @f_zbf_double_zbf_s_arg(double)
+void f_zbf_double_zbf_s_arg(struct zbf_double_zbf_s a) {}
+
+// CHECK: define double @f_ret_zbf_double_zbf_s()
+struct zbf_double_zbf_s f_ret_zbf_double_zbf_s() {
+  return (struct zbf_double_zbf_s){1.0};
+}
+
+// Check that structs containing two floating point values (FLEN <= width) are
+// expanded provided sufficient FPRs are available.
+
+struct double_double_s { double f; double g; };
+struct double_float_s { double f; float g; };
+
+// CHECK: define void @f_double_double_s_arg(double, double)
+void f_double_double_s_arg(struct double_double_s a) {}
+
+// CHECK: define { double, double } @f_ret_double_double_s()
+struct double_double_s f_ret_double_double_s() {
+  return (struct double_double_s){1.0, 2.0};
+}
+
+// CHECK: define void @f_double_float_s_arg(double, float)
+void f_double_float_s_arg(struct double_float_s a) {}
+
+// CHECK: define { double, float } @f_ret_double_float_s()
+struct double_float_s f_ret_double_float_s() {
+  return (struct double_float_s){1.0, 2.0};
+}
+
+// CHECK: define void @f_double_double_s_arg_insufficient_fprs(float %a, double %b, double %c, double %d, double %e, double %f, double %g, %struct.double_double_s* %h)
+void f_double_double_s_arg_insufficient_fprs(float a, double b, double c, double d,
+    double e, double f, double g, struct double_double_s h) {}
+
+// Check that structs containing int+double values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct double_int8_s { double f; int8_t i; };
+struct double_uint8_s { double f; uint8_t i; };
+struct double_int32_s { double f; int32_t i; };
+struct double_int64_s { double f; int64_t i; };
+struct double_int64bf_s { double f; int64_t i : 32; };
+struct double_int8_zbf_s { double f; int8_t i; int : 0; };
+
+// CHECK: define void @f_double_int8_s_arg(double, i8)
+void f_double_int8_s_arg(struct double_int8_s a) {}
+
+// CHECK: define { double, i8 } @f_ret_double_int8_s()
+struct double_int8_s f_ret_double_int8_s() {
+  return (struct double_int8_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_uint8_s_arg(double, i8)
+void f_double_uint8_s_arg(struct double_uint8_s a) {}
+
+// CHECK: define { double, i8 } @f_ret_double_uint8_s()
+struct double_uint8_s f_ret_double_uint8_s() {
+  return (struct double_uint8_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int32_s_arg(double, i32)
+void f_double_int32_s_arg(struct double_int32_s a) {}
+
+// CHECK: define { double, i32 } @f_ret_double_int32_s()
+struct double_int32_s f_ret_double_int32_s() {
+  return (struct double_int32_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int64_s_arg(%struct.double_int64_s* %a)
+void f_double_int64_s_arg(struct double_int64_s a) {}
+
+// CHECK: define void @f_ret_double_int64_s(%struct.double_int64_s* noalias sret %agg.result)
+struct double_int64_s f_ret_double_int64_s() {
+  return (struct double_int64_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int64bf_s_arg(double, i32)
+void f_double_int64bf_s_arg(struct double_int64bf_s a) {}
+
+// CHECK: define { double, i32 } @f_ret_double_int64bf_s()
+struct double_int64bf_s f_ret_double_int64bf_s() {
+  return (struct double_int64bf_s){1.0, 2};
+}
+
+// The zero-width bitfield means the struct can't be passed according to the
+// floating point calling convention.
+
+// CHECK: define void @f_double_int8_zbf_s(double, i8)
+void f_double_int8_zbf_s(struct double_int8_zbf_s a) {}
+
+// CHECK: define { double, i8 } @f_ret_double_int8_zbf_s()
+struct double_int8_zbf_s f_ret_double_int8_zbf_s() {
+  return (struct double_int8_zbf_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int8_s_arg_insufficient_gprs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, %struct.double_int8_s* %i)
+void f_double_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, struct double_int8_s i) {}
+
+// CHECK: define void @f_struct_double_int8_insufficient_fprs(float %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, %struct.double_int8_s* %i)
+void f_struct_double_int8_insufficient_fprs(float a, double b, double c, double d,
+                                           double e, double f, double g, double h, struct double_int8_s i) {}
+
+// Complex floating-point values or structs containing a single complex
+// floating-point value should be passed as if it were an fp+fp struct.
+
+// CHECK: define void @f_doublecomplex(double %a.coerce0, double %a.coerce1)
+void f_doublecomplex(double __complex__ a) {}
+
+// CHECK: define { double, double } @f_ret_doublecomplex()
+double __complex__ f_ret_doublecomplex() {
+  return 1.0;
+}
+
+struct doublecomplex_s { double __complex__ c; };
+
+// CHECK: define void @f_doublecomplex_s_arg(double, double)
+void f_doublecomplex_s_arg(struct doublecomplex_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublecomplex_s()
+struct doublecomplex_s f_ret_doublecomplex_s() {
+  return (struct doublecomplex_s){1.0};
+}
+
+// Test single or two-element structs that need flattening. e.g. those
+// containing nested structs, doubles in small arrays, zero-length structs etc.
+
+struct doublearr1_s { double a[1]; };
+
+// CHECK: define void @f_doublearr1_s_arg(double)
+void f_doublearr1_s_arg(struct doublearr1_s a) {}
+
+// CHECK: define double @f_ret_doublearr1_s()
+struct doublearr1_s f_ret_doublearr1_s() {
+  return (struct doublearr1_s){{1.0}};
+}
+
+struct doublearr2_s { double a[2]; };
+
+// CHECK: define void @f_doublearr2_s_arg(double, double)
+void f_doublearr2_s_arg(struct doublearr2_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_s()
+struct doublearr2_s f_ret_doublearr2_s() {
+  return (struct doublearr2_s){{1.0, 2.0}};
+}
+
+struct doublearr2_tricky1_s { struct { double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky1_s_arg(double, double)
+void f_doublearr2_tricky1_s_arg(struct doublearr2_tricky1_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky1_s()
+struct doublearr2_tricky1_s f_ret_doublearr2_tricky1_s() {
+  return (struct doublearr2_tricky1_s){{{{1.0}}, {{2.0}}}};
+}
+
+struct doublearr2_tricky2_s { struct {}; struct { double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky2_s_arg(double, double)
+void f_doublearr2_tricky2_s_arg(struct doublearr2_tricky2_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky2_s()
+struct doublearr2_tricky2_s f_ret_doublearr2_tricky2_s() {
+  return (struct doublearr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct doublearr2_tricky3_s { union {}; struct { double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky3_s_arg(double, double)
+void f_doublearr2_tricky3_s_arg(struct doublearr2_tricky3_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky3_s()
+struct doublearr2_tricky3_s f_ret_doublearr2_tricky3_s() {
+  return (struct doublearr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct doublearr2_tricky4_s { union {}; struct { struct {}; double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky4_s_arg(double, double)
+void f_doublearr2_tricky4_s_arg(struct doublearr2_tricky4_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky4_s()
+struct doublearr2_tricky4_s f_ret_doublearr2_tricky4_s() {
+  return (struct doublearr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}};
+}
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct int_double_int_s { int a; double b; int c; };
+
+// CHECK: define void @f_int_double_int_s_arg(%struct.int_double_int_s* %a)
+void f_int_double_int_s_arg(struct int_double_int_s a) {}
+
+// CHECK: define void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret %agg.result)
+struct int_double_int_s f_ret_int_double_int_s() {
+  return (struct int_double_int_s){1, 2.0, 3};
+}
+
+struct int64_double_s { int64_t a; double b; };
+
+// CHECK: define void @f_int64_double_s_arg(%struct.int64_double_s* %a)
+void f_int64_double_s_arg(struct int64_double_s a) {}
+
+// CHECK: define void @f_ret_int64_double_s(%struct.int64_double_s* noalias sret %agg.result)
+struct int64_double_s f_ret_int64_double_s() {
+  return (struct int64_double_s){1, 2.0};
+}
+
+struct char_char_double_s { char a; char b; double c; };
+
+// CHECK-LABEL: define void @f_char_char_double_s_arg(%struct.char_char_double_s* %a)
+void f_char_char_double_s_arg(struct char_char_double_s a) {}
+
+// CHECK: define void @f_ret_char_char_double_s(%struct.char_char_double_s* noalias sret %agg.result)
+struct char_char_double_s f_ret_char_char_double_s() {
+  return (struct char_char_double_s){1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even
+// if they can only contain a double.
+
+union double_u { double a; };
+
+// CHECK: define void @f_double_u_arg(i64 %a.coerce)
+void f_double_u_arg(union double_u a) {}
+
+// CHECK: define i64 @f_ret_double_u()
+union double_u f_ret_double_u() {
+  return (union double_u){1.0};
+}
diff --git a/clang/test/CodeGen/riscv32-ilp32f-abi.c b/clang/test/CodeGen/riscv32-ilp32f-abi.c
new file mode 100644
index 0000000000000..76092958aeddf
--- /dev/null
+++ b/clang/test/CodeGen/riscv32-ilp32f-abi.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+
+// Doubles are still passed in GPRs, so the 'e' argument will be anyext as
+// GPRs are exhausted.
+
+// CHECK: define void @f_fpr_tracking(double %a, double %b, double %c, double %d, i8 %e)
+void f_fpr_tracking(double a, double b, double c, double d, int8_t e) {}
+
+// Lowering for doubles is unnmodified, as 64 > FLEN.
+
+struct double_s { double d; };
+
+// CHECK: define void @f_double_s_arg(i64 %a.coerce)
+void f_double_s_arg(struct double_s a) {}
+
+// CHECK: define i64 @f_ret_double_s()
+struct double_s f_ret_double_s() {
+  return (struct double_s){1.0};
+}
+
+struct double_double_s { double d; double e; };
+
+// CHECK: define void @f_double_double_s_arg(%struct.double_double_s* %a)
+void f_double_double_s_arg(struct double_double_s a) {}
+
+// CHECK: define void @f_ret_double_double_s(%struct.double_double_s* noalias sret %agg.result)
+struct double_double_s f_ret_double_double_s() {
+  return (struct double_double_s){1.0, 2.0};
+}
+
+struct double_int8_s { double d; int64_t i; };
+
+struct int_double_s { int a; double b; };
+
+// CHECK: define void @f_int_double_s_arg(%struct.int_double_s* %a)
+void f_int_double_s_arg(struct int_double_s a) {}
+
+// CHECK: define void @f_ret_int_double_s(%struct.int_double_s* noalias sret %agg.result)
+struct int_double_s f_ret_int_double_s() {
+  return (struct int_double_s){1, 2.0};
+}
+
diff --git a/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
new file mode 100644
index 0000000000000..b960513655b11
--- /dev/null
+++ b/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
@@ -0,0 +1,275 @@
+// RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-abi ilp32f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+// RUN: %clang_cc1 -triple riscv32 -target-feature +d -target-abi ilp32d -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Floats are passed in FPRs, so argument 'i' will be passed zero-extended 
+// because it will be passed in a GPR.
+
+// CHECK: define void @f_fpr_tracking(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, i8 zeroext %i)
+void f_fpr_tracking(float a, float b, float c, float d, float e, float f,
+                    float g, float h, uint8_t i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will 
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are 
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it 
+// were a standalone floating-point real.
+
+struct float_s { float f; };
+
+// CHECK: define void @f_float_s_arg(float)
+void f_float_s_arg(struct float_s a) {}
+
+// CHECK: define float @f_ret_float_s()
+struct float_s f_ret_float_s() {
+  return (struct float_s){1.0};
+}
+
+// A struct containing a float and any number of zero-width bitfields is
+// passed as though it were a standalone floating-point real.
+
+struct zbf_float_s { int : 0; float f; };
+struct zbf_float_zbf_s { int : 0; float f; int : 0; };
+
+// CHECK: define void @f_zbf_float_s_arg(float)
+void f_zbf_float_s_arg(struct zbf_float_s a) {}
+
+// CHECK: define float @f_ret_zbf_float_s()
+struct zbf_float_s f_ret_zbf_float_s() {
+  return (struct zbf_float_s){1.0};
+}
+
+// CHECK: define void @f_zbf_float_zbf_s_arg(float)
+void f_zbf_float_zbf_s_arg(struct zbf_float_zbf_s a) {}
+
+// CHECK: define float @f_ret_zbf_float_zbf_s()
+struct zbf_float_zbf_s f_ret_zbf_float_zbf_s() {
+  return (struct zbf_float_zbf_s){1.0};
+}
+
+// Check that structs containing two float values (FLEN <= width) are expanded
+// provided sufficient FPRs are available.
+
+struct float_float_s { float f; float g; };
+
+// CHECK: define void @f_float_float_s_arg(float, float)
+void f_float_float_s_arg(struct float_float_s a) {}
+
+// CHECK: define { float, float } @f_ret_float_float_s()
+struct float_float_s f_ret_float_float_s() {
+  return (struct float_float_s){1.0, 2.0};
+}
+
+// CHECK: define void @f_float_float_s_arg_insufficient_fprs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, [2 x i32] %h.coerce)
+void f_float_float_s_arg_insufficient_fprs(float a, float b, float c, float d, 
+    float e, float f, float g, struct float_float_s h) {}
+
+// Check that structs containing int+float values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct float_int8_s { float f; int8_t i; };
+struct float_uint8_s { float f; uint8_t i; };
+struct float_int32_s { float f; int32_t i; };
+struct float_int64_s { float f; int64_t i; };
+struct float_int64bf_s { float f; int64_t i : 32; };
+struct float_int8_zbf_s { float f; int8_t i; int : 0; };
+
+// CHECK: define void @f_float_int8_s_arg(float, i8)
+void f_float_int8_s_arg(struct float_int8_s a) {}
+
+// CHECK: define { float, i8 } @f_ret_float_int8_s()
+struct float_int8_s f_ret_float_int8_s() {
+  return (struct float_int8_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_uint8_s_arg(float, i8)
+void f_float_uint8_s_arg(struct float_uint8_s a) {}
+
+// CHECK: define { float, i8 } @f_ret_float_uint8_s()
+struct float_uint8_s f_ret_float_uint8_s() {
+  return (struct float_uint8_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int32_s_arg(float, i32)
+void f_float_int32_s_arg(struct float_int32_s a) {}
+
+// CHECK: define { float, i32 } @f_ret_float_int32_s()
+struct float_int32_s f_ret_float_int32_s() {
+  return (struct float_int32_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int64_s_arg(%struct.float_int64_s* %a)
+void f_float_int64_s_arg(struct float_int64_s a) {}
+
+// CHECK: define void @f_ret_float_int64_s(%struct.float_int64_s* noalias sret %agg.result)
+struct float_int64_s f_ret_float_int64_s() {
+  return (struct float_int64_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int64bf_s_arg(float, i32)
+void f_float_int64bf_s_arg(struct float_int64bf_s a) {}
+
+// CHECK: define { float, i32 } @f_ret_float_int64bf_s()
+struct float_int64bf_s f_ret_float_int64bf_s() {
+  return (struct float_int64bf_s){1.0, 2};
+}
+
+// The zero-width bitfield means the struct can't be passed according to the
+// floating point calling convention.
+
+// CHECK: define void @f_float_int8_zbf_s(float, i8)
+void f_float_int8_zbf_s(struct float_int8_zbf_s a) {}
+
+// CHECK: define { float, i8 } @f_ret_float_int8_zbf_s()
+struct float_int8_zbf_s f_ret_float_int8_zbf_s() {
+  return (struct float_int8_zbf_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int8_s_arg_insufficient_gprs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, [2 x i32] %i.coerce)
+void f_float_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, struct float_int8_s i) {}
+
+// CHECK: define void @f_struct_float_int8_insufficient_fprs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, [2 x i32] %i.coerce)
+void f_struct_float_int8_insufficient_fprs(float a, float b, float c, float d,
+                                           float e, float f, float g, float h, struct float_int8_s i) {}
+
+// Complex floating-point values or structs containing a single complex 
+// floating-point value should be passed as if it were an fp+fp struct.
+
+// CHECK: define void @f_floatcomplex(float %a.coerce0, float %a.coerce1)
+void f_floatcomplex(float __complex__ a) {}
+
+// CHECK: define { float, float } @f_ret_floatcomplex()
+float __complex__ f_ret_floatcomplex() {
+  return 1.0;
+}
+
+struct floatcomplex_s { float __complex__ c; };
+
+// CHECK: define void @f_floatcomplex_s_arg(float, float)
+void f_floatcomplex_s_arg(struct floatcomplex_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatcomplex_s()
+struct floatcomplex_s f_ret_floatcomplex_s() {
+  return (struct floatcomplex_s){1.0};
+}
+
+// Test single or two-element structs that need flattening. e.g. those 
+// containing nested structs, floats in small arrays, zero-length structs etc.
+
+struct floatarr1_s { float a[1]; };
+
+// CHECK: define void @f_floatarr1_s_arg(float)
+void f_floatarr1_s_arg(struct floatarr1_s a) {}
+
+// CHECK: define float @f_ret_floatarr1_s()
+struct floatarr1_s f_ret_floatarr1_s() {
+  return (struct floatarr1_s){{1.0}};
+}
+
+struct floatarr2_s { float a[2]; };
+
+// CHECK: define void @f_floatarr2_s_arg(float, float)
+void f_floatarr2_s_arg(struct floatarr2_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_s()
+struct floatarr2_s f_ret_floatarr2_s() {
+  return (struct floatarr2_s){{1.0, 2.0}};
+}
+
+struct floatarr2_tricky1_s { struct { float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky1_s_arg(float, float)
+void f_floatarr2_tricky1_s_arg(struct floatarr2_tricky1_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky1_s()
+struct floatarr2_tricky1_s f_ret_floatarr2_tricky1_s() {
+  return (struct floatarr2_tricky1_s){{{{1.0}}, {{2.0}}}};
+}
+
+struct floatarr2_tricky2_s { struct {}; struct { float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky2_s_arg(float, float)
+void f_floatarr2_tricky2_s_arg(struct floatarr2_tricky2_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky2_s()
+struct floatarr2_tricky2_s f_ret_floatarr2_tricky2_s() {
+  return (struct floatarr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct floatarr2_tricky3_s { union {}; struct { float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky3_s_arg(float, float)
+void f_floatarr2_tricky3_s_arg(struct floatarr2_tricky3_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky3_s()
+struct floatarr2_tricky3_s f_ret_floatarr2_tricky3_s() {
+  return (struct floatarr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct floatarr2_tricky4_s { union {}; struct { struct {}; float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky4_s_arg(float, float)
+void f_floatarr2_tricky4_s_arg(struct floatarr2_tricky4_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky4_s()
+struct floatarr2_tricky4_s f_ret_floatarr2_tricky4_s() {
+  return (struct floatarr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}};
+}
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct int_float_int_s { int a; float b; int c; };
+
+// CHECK: define void @f_int_float_int_s_arg(%struct.int_float_int_s* %a)
+void f_int_float_int_s_arg(struct int_float_int_s a) {}
+
+// CHECK: define void @f_ret_int_float_int_s(%struct.int_float_int_s* noalias sret %agg.result)
+struct int_float_int_s f_ret_int_float_int_s() {
+  return (struct int_float_int_s){1, 2.0, 3};
+}
+
+struct int64_float_s { int64_t a; float b; };
+
+// CHECK: define void @f_int64_float_s_arg(%struct.int64_float_s* %a)
+void f_int64_float_s_arg(struct int64_float_s a) {}
+
+// CHECK: define void @f_ret_int64_float_s(%struct.int64_float_s* noalias sret %agg.result)
+struct int64_float_s f_ret_int64_float_s() {
+  return (struct int64_float_s){1, 2.0};
+}
+
+struct char_char_float_s { char a; char b; float c; };
+
+// CHECK-LABEL: define void @f_char_char_float_s_arg([2 x i32] %a.coerce)
+void f_char_char_float_s_arg(struct char_char_float_s a) {}
+
+// CHECK: define [2 x i32] @f_ret_char_char_float_s()
+struct char_char_float_s f_ret_char_char_float_s() {
+  return (struct char_char_float_s){1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even 
+// if they can only contain a float.
+
+union float_u { float a; };
+
+// CHECK: define void @f_float_u_arg(i32 %a.coerce)
+void f_float_u_arg(union float_u a) {}
+
+// CHECK: define i32 @f_ret_float_u()
+union float_u f_ret_float_u() {
+  return (union float_u){1.0};
+}
diff --git a/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c b/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
index 3b944e716a2a0..d457bdf3c64e3 100644
--- a/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
+++ b/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-abi lp64f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
 
 // This file contains test cases that will have the same output for the lp64
 // and lp64f ABIs.
diff --git a/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c b/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
index f51d8252b8f47..f3523702e9a29 100644
--- a/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
+++ b/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
@@ -1,4 +1,8 @@
 // RUN: %clang_cc1 -triple riscv64 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-abi lp64f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-abi lp64d -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
 
 // This file contains test cases that will have the same output for the lp64,
 // lp64f, and lp64d ABIs.
diff --git a/clang/test/CodeGen/riscv64-lp64d-abi.c b/clang/test/CodeGen/riscv64-lp64d-abi.c
new file mode 100644
index 0000000000000..00967b5fca852
--- /dev/null
+++ b/clang/test/CodeGen/riscv64-lp64d-abi.c
@@ -0,0 +1,272 @@
+// RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-abi lp64d -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Doubles are passed in FPRs, so argument 'i' will be passed zero-extended
+// because it will be passed in a GPR.
+
+// CHECK: define void @f_fpr_tracking(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, i8 zeroext %i)
+void f_fpr_tracking(double a, double b, double c, double d, double e, double f,
+                    double g, double h, uint8_t i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it
+// were a standalone floating-point real.
+
+struct double_s { double f; };
+
+// CHECK: define void @f_double_s_arg(double)
+void f_double_s_arg(struct double_s a) {}
+
+// CHECK: define double @f_ret_double_s()
+struct double_s f_ret_double_s() {
+  return (struct double_s){1.0};
+}
+
+// A struct containing a double and any number of zero-width bitfields is
+// passed as though it were a standalone floating-point real.
+
+struct zbf_double_s { int : 0; double f; };
+struct zbf_double_zbf_s { int : 0; double f; int : 0; };
+
+// CHECK: define void @f_zbf_double_s_arg(double)
+void f_zbf_double_s_arg(struct zbf_double_s a) {}
+
+// CHECK: define double @f_ret_zbf_double_s()
+struct zbf_double_s f_ret_zbf_double_s() {
+  return (struct zbf_double_s){1.0};
+}
+
+// CHECK: define void @f_zbf_double_zbf_s_arg(double)
+void f_zbf_double_zbf_s_arg(struct zbf_double_zbf_s a) {}
+
+// CHECK: define double @f_ret_zbf_double_zbf_s()
+struct zbf_double_zbf_s f_ret_zbf_double_zbf_s() {
+  return (struct zbf_double_zbf_s){1.0};
+}
+
+// Check that structs containing two floating point values (FLEN <= width) are
+// expanded provided sufficient FPRs are available.
+
+struct double_double_s { double f; double g; };
+struct double_float_s { double f; float g; };
+
+// CHECK: define void @f_double_double_s_arg(double, double)
+void f_double_double_s_arg(struct double_double_s a) {}
+
+// CHECK: define { double, double } @f_ret_double_double_s()
+struct double_double_s f_ret_double_double_s() {
+  return (struct double_double_s){1.0, 2.0};
+}
+
+// CHECK: define void @f_double_float_s_arg(double, float)
+void f_double_float_s_arg(struct double_float_s a) {}
+
+// CHECK: define { double, float } @f_ret_double_float_s()
+struct double_float_s f_ret_double_float_s() {
+  return (struct double_float_s){1.0, 2.0};
+}
+
+// CHECK: define void @f_double_double_s_arg_insufficient_fprs(float %a, double %b, double %c, double %d, double %e, double %f, double %g, [2 x i64] %h.coerce)
+void f_double_double_s_arg_insufficient_fprs(float a, double b, double c, double d,
+    double e, double f, double g, struct double_double_s h) {}
+
+// Check that structs containing int+double values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct double_int8_s { double f; int8_t i; };
+struct double_uint8_s { double f; uint8_t i; };
+struct double_int32_s { double f; int32_t i; };
+struct double_int64_s { double f; int64_t i; };
+struct double_int128bf_s { double f; __int128_t i : 64; };
+struct double_int8_zbf_s { double f; int8_t i; int : 0; };
+
+// CHECK: define void @f_double_int8_s_arg(double, i8)
+void f_double_int8_s_arg(struct double_int8_s a) {}
+
+// CHECK: define { double, i8 } @f_ret_double_int8_s()
+struct double_int8_s f_ret_double_int8_s() {
+  return (struct double_int8_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_uint8_s_arg(double, i8)
+void f_double_uint8_s_arg(struct double_uint8_s a) {}
+
+// CHECK: define { double, i8 } @f_ret_double_uint8_s()
+struct double_uint8_s f_ret_double_uint8_s() {
+  return (struct double_uint8_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int32_s_arg(double, i32)
+void f_double_int32_s_arg(struct double_int32_s a) {}
+
+// CHECK: define { double, i32 } @f_ret_double_int32_s()
+struct double_int32_s f_ret_double_int32_s() {
+  return (struct double_int32_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int64_s_arg(double, i64)
+void f_double_int64_s_arg(struct double_int64_s a) {}
+
+// CHECK: define { double, i64 } @f_ret_double_int64_s()
+struct double_int64_s f_ret_double_int64_s() {
+  return (struct double_int64_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int128bf_s_arg(double, i64)
+void f_double_int128bf_s_arg(struct double_int128bf_s a) {}
+
+// CHECK: define { double, i64 } @f_ret_double_int128bf_s()
+struct double_int128bf_s f_ret_double_int128bf_s() {
+  return (struct double_int128bf_s){1.0, 2};
+}
+
+// The zero-width bitfield means the struct can't be passed according to the
+// floating point calling convention.
+
+// CHECK: define void @f_double_int8_zbf_s(double, i8)
+void f_double_int8_zbf_s(struct double_int8_zbf_s a) {}
+
+// CHECK: define { double, i8 } @f_ret_double_int8_zbf_s()
+struct double_int8_zbf_s f_ret_double_int8_zbf_s() {
+  return (struct double_int8_zbf_s){1.0, 2};
+}
+
+// CHECK: define void @f_double_int8_s_arg_insufficient_gprs(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h, [2 x i64] %i.coerce)
+void f_double_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, struct double_int8_s i) {}
+
+// CHECK: define void @f_struct_double_int8_insufficient_fprs(float %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, [2 x i64] %i.coerce)
+void f_struct_double_int8_insufficient_fprs(float a, double b, double c, double d,
+                                           double e, double f, double g, double h, struct double_int8_s i) {}
+
+// Complex floating-point values or structs containing a single complex
+// floating-point value should be passed as if it were an fp+fp struct.
+
+// CHECK: define void @f_doublecomplex(double %a.coerce0, double %a.coerce1)
+void f_doublecomplex(double __complex__ a) {}
+
+// CHECK: define { double, double } @f_ret_doublecomplex()
+double __complex__ f_ret_doublecomplex() {
+  return 1.0;
+}
+
+struct doublecomplex_s { double __complex__ c; };
+
+// CHECK: define void @f_doublecomplex_s_arg(double, double)
+void f_doublecomplex_s_arg(struct doublecomplex_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublecomplex_s()
+struct doublecomplex_s f_ret_doublecomplex_s() {
+  return (struct doublecomplex_s){1.0};
+}
+
+// Test single or two-element structs that need flattening. e.g. those
+// containing nested structs, doubles in small arrays, zero-length structs etc.
+
+struct doublearr1_s { double a[1]; };
+
+// CHECK: define void @f_doublearr1_s_arg(double)
+void f_doublearr1_s_arg(struct doublearr1_s a) {}
+
+// CHECK: define double @f_ret_doublearr1_s()
+struct doublearr1_s f_ret_doublearr1_s() {
+  return (struct doublearr1_s){{1.0}};
+}
+
+struct doublearr2_s { double a[2]; };
+
+// CHECK: define void @f_doublearr2_s_arg(double, double)
+void f_doublearr2_s_arg(struct doublearr2_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_s()
+struct doublearr2_s f_ret_doublearr2_s() {
+  return (struct doublearr2_s){{1.0, 2.0}};
+}
+
+struct doublearr2_tricky1_s { struct { double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky1_s_arg(double, double)
+void f_doublearr2_tricky1_s_arg(struct doublearr2_tricky1_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky1_s()
+struct doublearr2_tricky1_s f_ret_doublearr2_tricky1_s() {
+  return (struct doublearr2_tricky1_s){{{{1.0}}, {{2.0}}}};
+}
+
+struct doublearr2_tricky2_s { struct {}; struct { double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky2_s_arg(double, double)
+void f_doublearr2_tricky2_s_arg(struct doublearr2_tricky2_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky2_s()
+struct doublearr2_tricky2_s f_ret_doublearr2_tricky2_s() {
+  return (struct doublearr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct doublearr2_tricky3_s { union {}; struct { double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky3_s_arg(double, double)
+void f_doublearr2_tricky3_s_arg(struct doublearr2_tricky3_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky3_s()
+struct doublearr2_tricky3_s f_ret_doublearr2_tricky3_s() {
+  return (struct doublearr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct doublearr2_tricky4_s { union {}; struct { struct {}; double f[1]; } g[2]; };
+
+// CHECK: define void @f_doublearr2_tricky4_s_arg(double, double)
+void f_doublearr2_tricky4_s_arg(struct doublearr2_tricky4_s a) {}
+
+// CHECK: define { double, double } @f_ret_doublearr2_tricky4_s()
+struct doublearr2_tricky4_s f_ret_doublearr2_tricky4_s() {
+  return (struct doublearr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}};
+}
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct int_double_int_s { int a; double b; int c; };
+
+// CHECK: define void @f_int_double_int_s_arg(%struct.int_double_int_s* %a)
+void f_int_double_int_s_arg(struct int_double_int_s a) {}
+
+// CHECK: define void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret %agg.result)
+struct int_double_int_s f_ret_int_double_int_s() {
+  return (struct int_double_int_s){1, 2.0, 3};
+}
+
+struct char_char_double_s { char a; char b; double c; };
+
+// CHECK-LABEL: define void @f_char_char_double_s_arg([2 x i64] %a.coerce)
+void f_char_char_double_s_arg(struct char_char_double_s a) {}
+
+// CHECK: define [2 x i64] @f_ret_char_char_double_s()
+struct char_char_double_s f_ret_char_char_double_s() {
+  return (struct char_char_double_s){1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even
+// if they can only contain a double.
+
+union double_u { double a; };
+
+// CHECK: define void @f_double_u_arg(i64 %a.coerce)
+void f_double_u_arg(union double_u a) {}
+
+// CHECK: define i64 @f_ret_double_u()
+union double_u f_ret_double_u() {
+  return (union double_u){1.0};
+}
diff --git a/clang/test/CodeGen/riscv64-lp64f-lp64d-abi.c b/clang/test/CodeGen/riscv64-lp64f-lp64d-abi.c
new file mode 100644
index 0000000000000..eee2bc1bdcc6f
--- /dev/null
+++ b/clang/test/CodeGen/riscv64-lp64f-lp64d-abi.c
@@ -0,0 +1,265 @@
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-abi lp64f -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +d -target-abi lp64d -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+
+// Verify that the tracking of used GPRs and FPRs works correctly by checking
+// that small integers are sign/zero extended when passed in registers.
+
+// Floats are passed in FPRs, so argument 'i' will be passed zero-extended
+// because it will be passed in a GPR.
+
+// CHECK: define void @f_fpr_tracking(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, i8 zeroext %i)
+void f_fpr_tracking(float a, float b, float c, float d, float e, float f,
+                    float g, float h, uint8_t i) {}
+
+// Check that fp, fp+fp, and int+fp structs are lowered correctly. These will
+// be passed in FPR, FPR+FPR, or GPR+FPR regs if sufficient registers are
+// available the widths are <= XLEN and FLEN, and should be expanded to
+// separate arguments in IR. They are passed by the same rules for returns,
+// but will be lowered to simple two-element structs if necessary (as LLVM IR
+// functions cannot return multiple values).
+
+// A struct containing just one floating-point real is passed as though it
+// were a standalone floating-point real.
+
+struct float_s { float f; };
+
+// CHECK: define void @f_float_s_arg(float)
+void f_float_s_arg(struct float_s a) {}
+
+// CHECK: define float @f_ret_float_s()
+struct float_s f_ret_float_s() {
+  return (struct float_s){1.0};
+}
+
+// A struct containing a float and any number of zero-width bitfields is
+// passed as though it were a standalone floating-point real.
+
+struct zbf_float_s { int : 0; float f; };
+struct zbf_float_zbf_s { int : 0; float f; int : 0; };
+
+// CHECK: define void @f_zbf_float_s_arg(float)
+void f_zbf_float_s_arg(struct zbf_float_s a) {}
+
+// CHECK: define float @f_ret_zbf_float_s()
+struct zbf_float_s f_ret_zbf_float_s() {
+  return (struct zbf_float_s){1.0};
+}
+
+// CHECK: define void @f_zbf_float_zbf_s_arg(float)
+void f_zbf_float_zbf_s_arg(struct zbf_float_zbf_s a) {}
+
+// CHECK: define float @f_ret_zbf_float_zbf_s()
+struct zbf_float_zbf_s f_ret_zbf_float_zbf_s() {
+  return (struct zbf_float_zbf_s){1.0};
+}
+
+// Check that structs containing two float values (FLEN <= width) are expanded
+// provided sufficient FPRs are available.
+
+struct float_float_s { float f; float g; };
+
+// CHECK: define void @f_float_float_s_arg(float, float)
+void f_float_float_s_arg(struct float_float_s a) {}
+
+// CHECK: define { float, float } @f_ret_float_float_s()
+struct float_float_s f_ret_float_float_s() {
+  return (struct float_float_s){1.0, 2.0};
+}
+
+// CHECK: define void @f_float_float_s_arg_insufficient_fprs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, i64 %h.coerce)
+void f_float_float_s_arg_insufficient_fprs(float a, float b, float c, float d,
+    float e, float f, float g, struct float_float_s h) {}
+
+// Check that structs containing int+float values are expanded, provided
+// sufficient FPRs and GPRs are available. The integer components are neither
+// sign or zero-extended.
+
+struct float_int8_s { float f; int8_t i; };
+struct float_uint8_s { float f; uint8_t i; };
+struct float_int32_s { float f; int32_t i; };
+struct float_int64_s { float f; int64_t i; };
+struct float_int128bf_s { float f; __int128_t i : 64; };
+struct float_int8_zbf_s { float f; int8_t i; int : 0; };
+
+// CHECK: define void @f_float_int8_s_arg(float, i8)
+void f_float_int8_s_arg(struct float_int8_s a) {}
+
+// CHECK: define { float, i8 } @f_ret_float_int8_s()
+struct float_int8_s f_ret_float_int8_s() {
+  return (struct float_int8_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_uint8_s_arg(float, i8)
+void f_float_uint8_s_arg(struct float_uint8_s a) {}
+
+// CHECK: define { float, i8 } @f_ret_float_uint8_s()
+struct float_uint8_s f_ret_float_uint8_s() {
+  return (struct float_uint8_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int32_s_arg(float, i32)
+void f_float_int32_s_arg(struct float_int32_s a) {}
+
+// CHECK: define { float, i32 } @f_ret_float_int32_s()
+struct float_int32_s f_ret_float_int32_s() {
+  return (struct float_int32_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int64_s_arg(float, i64)
+void f_float_int64_s_arg(struct float_int64_s a) {}
+
+// CHECK: define { float, i64 } @f_ret_float_int64_s()
+struct float_int64_s f_ret_float_int64_s() {
+  return (struct float_int64_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int128bf_s_arg(float, i64)
+void f_float_int128bf_s_arg(struct float_int128bf_s a) {}
+
+// CHECK: define <{ float, i64 }> @f_ret_float_int128bf_s()
+struct float_int128bf_s f_ret_float_int128bf_s() {
+  return (struct float_int128bf_s){1.0, 2};
+}
+
+// The zero-width bitfield means the struct can't be passed according to the
+// floating point calling convention.
+
+// CHECK: define void @f_float_int8_zbf_s(float, i8)
+void f_float_int8_zbf_s(struct float_int8_zbf_s a) {}
+
+// CHECK: define { float, i8 } @f_ret_float_int8_zbf_s()
+struct float_int8_zbf_s f_ret_float_int8_zbf_s() {
+  return (struct float_int8_zbf_s){1.0, 2};
+}
+
+// CHECK: define void @f_float_int8_s_arg_insufficient_gprs(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h, i64 %i.coerce)
+void f_float_int8_s_arg_insufficient_gprs(int a, int b, int c, int d, int e,
+                                          int f, int g, int h, struct float_int8_s i) {}
+
+// CHECK: define void @f_struct_float_int8_insufficient_fprs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, i64 %i.coerce)
+void f_struct_float_int8_insufficient_fprs(float a, float b, float c, float d,
+                                           float e, float f, float g, float h, struct float_int8_s i) {}
+
+// Complex floating-point values or structs containing a single complex
+// floating-point value should be passed as if it were an fp+fp struct.
+
+// CHECK: define void @f_floatcomplex(float %a.coerce0, float %a.coerce1)
+void f_floatcomplex(float __complex__ a) {}
+
+// CHECK: define { float, float } @f_ret_floatcomplex()
+float __complex__ f_ret_floatcomplex() {
+  return 1.0;
+}
+
+struct floatcomplex_s { float __complex__ c; };
+
+// CHECK: define void @f_floatcomplex_s_arg(float, float)
+void f_floatcomplex_s_arg(struct floatcomplex_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatcomplex_s()
+struct floatcomplex_s f_ret_floatcomplex_s() {
+  return (struct floatcomplex_s){1.0};
+}
+
+// Test single or two-element structs that need flattening. e.g. those
+// containing nested structs, floats in small arrays, zero-length structs etc.
+
+struct floatarr1_s { float a[1]; };
+
+// CHECK: define void @f_floatarr1_s_arg(float)
+void f_floatarr1_s_arg(struct floatarr1_s a) {}
+
+// CHECK: define float @f_ret_floatarr1_s()
+struct floatarr1_s f_ret_floatarr1_s() {
+  return (struct floatarr1_s){{1.0}};
+}
+
+struct floatarr2_s { float a[2]; };
+
+// CHECK: define void @f_floatarr2_s_arg(float, float)
+void f_floatarr2_s_arg(struct floatarr2_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_s()
+struct floatarr2_s f_ret_floatarr2_s() {
+  return (struct floatarr2_s){{1.0, 2.0}};
+}
+
+struct floatarr2_tricky1_s { struct { float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky1_s_arg(float, float)
+void f_floatarr2_tricky1_s_arg(struct floatarr2_tricky1_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky1_s()
+struct floatarr2_tricky1_s f_ret_floatarr2_tricky1_s() {
+  return (struct floatarr2_tricky1_s){{{{1.0}}, {{2.0}}}};
+}
+
+struct floatarr2_tricky2_s { struct {}; struct { float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky2_s_arg(float, float)
+void f_floatarr2_tricky2_s_arg(struct floatarr2_tricky2_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky2_s()
+struct floatarr2_tricky2_s f_ret_floatarr2_tricky2_s() {
+  return (struct floatarr2_tricky2_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct floatarr2_tricky3_s { union {}; struct { float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky3_s_arg(float, float)
+void f_floatarr2_tricky3_s_arg(struct floatarr2_tricky3_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky3_s()
+struct floatarr2_tricky3_s f_ret_floatarr2_tricky3_s() {
+  return (struct floatarr2_tricky3_s){{}, {{{1.0}}, {{2.0}}}};
+}
+
+struct floatarr2_tricky4_s { union {}; struct { struct {}; float f[1]; } g[2]; };
+
+// CHECK: define void @f_floatarr2_tricky4_s_arg(float, float)
+void f_floatarr2_tricky4_s_arg(struct floatarr2_tricky4_s a) {}
+
+// CHECK: define { float, float } @f_ret_floatarr2_tricky4_s()
+struct floatarr2_tricky4_s f_ret_floatarr2_tricky4_s() {
+  return (struct floatarr2_tricky4_s){{}, {{{}, {1.0}}, {{}, {2.0}}}};
+}
+
+// Test structs that should be passed according to the normal integer calling
+// convention.
+
+struct int_float_int_s { int a; float b; int c; };
+
+// CHECK: define void @f_int_float_int_s_arg([2 x i64] %a.coerce)
+void f_int_float_int_s_arg(struct int_float_int_s a) {}
+
+// CHECK: define [2 x i64] @f_ret_int_float_int_s()
+struct int_float_int_s f_ret_int_float_int_s() {
+  return (struct int_float_int_s){1, 2.0, 3};
+}
+
+struct char_char_float_s { char a; char b; float c; };
+
+// CHECK-LABEL: define void @f_char_char_float_s_arg(i64 %a.coerce)
+void f_char_char_float_s_arg(struct char_char_float_s a) {}
+
+// CHECK: define i64 @f_ret_char_char_float_s()
+struct char_char_float_s f_ret_char_char_float_s() {
+  return (struct char_char_float_s){1, 2, 3.0};
+}
+
+// Unions are always passed according to the integer calling convention, even
+// if they can only contain a float.
+
+union float_u { float a; };
+
+// CHECK: define void @f_float_u_arg(i64 %a.coerce)
+void f_float_u_arg(union float_u a) {}
+
+// CHECK: define i64 @f_ret_float_u()
+union float_u f_ret_float_u() {
+  return (union float_u){1.0};
+}
diff --git a/clang/test/Driver/riscv-abi.c b/clang/test/Driver/riscv-abi.c
index 6a97ff671ddb0..1a4c7ed477b6d 100644
--- a/clang/test/Driver/riscv-abi.c
+++ b/clang/test/Driver/riscv-abi.c
@@ -9,17 +9,15 @@
 
 // CHECK-ILP32: "-target-abi" "ilp32"
 
-// TODO: ilp32f support.
-// RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=ilp32f 2>&1 \
+// RUN: %clang -target riscv32-unknown-elf %s -### -o %t.o -march=rv32if -mabi=ilp32f 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-ILP32F %s
 
-// CHECK-ILP32F: error: unknown target ABI 'ilp32f'
+// CHECK-ILP32F: "-target-abi" "ilp32f"
 
-// TODO: ilp32d support.
-// RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=ilp32d 2>&1 \
+// RUN: %clang -target riscv32-unknown-elf %s -### -o %t.o -march=rv32ifd -mabi=ilp32d 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-ILP32D %s
 
-// CHECK-ILP32D: error: unknown target ABI 'ilp32d'
+// CHECK-ILP32D: "-target-abi" "ilp32d"
 
 // RUN: not %clang -target riscv32-unknown-elf %s -o %t.o -mabi=lp64 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-RV32-LP64 %s
@@ -37,17 +35,15 @@
 
 // CHECK-LP64: "-target-abi" "lp64"
 
-// TODO: lp64f support.
-// RUN: not %clang -target riscv64-unknown-elf %s -o %t.o -mabi=lp64f 2>&1 \
+// RUN:  %clang -target riscv64-unknown-elf %s -### -o %t.o -march=rv64f -mabi=lp64f 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-LP64F %s
 
-// CHECK-LP64F: error: unknown target ABI 'lp64f'
+// CHECK-LP64F: "-target-abi" "lp64f"
 
-// TODO: lp64d support.
-// RUN: not %clang -target riscv64-unknown-elf %s -o %t.o -mabi=lp64d 2>&1 \
+// RUN: %clang -target riscv64-unknown-elf %s -### -o %t.o -march=rv64d -mabi=lp64d 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-LP64D %s
 
-// CHECK-LP64D: error: unknown target ABI 'lp64d'
+// CHECK-LP64D: "-target-abi" "lp64d"
 
 // RUN: not %clang -target riscv64-unknown-elf %s -o %t.o -mabi=ilp32 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-RV64-ILP32 %s
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 2c63e0fa29dfb..36e49c36f0330 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -47,3 +47,27 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ic -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-C-EXT %s
 // CHECK-C-EXT: __riscv_compressed 1
+
+// RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32ifd -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-SOFT %s
+// RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ifd -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-SOFT %s
+// CHECK-SOFT: __riscv_float_abi_soft 1
+// CHECK-SOFT-NOT: __riscv_float_abi_single
+// CHECK-SOFT-NOT: __riscv_float_abi_double
+
+// RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32ifd -mabi=ilp32f -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-SINGLE %s
+// RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ifd -mabi=lp64f -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-SINGLE %s
+// CHECK-SINGLE: __riscv_float_abi_single 1
+// CHECK-SINGLE-NOT: __riscv_float_abi_soft
+// CHECK-SINGLE-NOT: __riscv_float_abi_double
+
+// RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32ifd -mabi=ilp32d -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-DOUBLE %s
+// RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ifd -mabi=lp64d -x c -E -dM %s \
+// RUN: -o - | FileCheck --check-prefix=CHECK-DOUBLE %s
+// CHECK-DOUBLE: __riscv_float_abi_double 1
+// CHECK-DOUBLE-NOT: __riscv_float_abi_soft
+// CHECK-DOUBLE-NOT: __riscv_float_abi_single

From 1931d3cb20a00da732c5210b123656632982fde0 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Fri, 19 Jul 2019 09:55:32 +0000
Subject: [PATCH 451/451] Merging r366511:
 ------------------------------------------------------------------------
 r366511 | lhames | 2019-07-19 00:47:18 +0200 (Fri, 19 Jul 2019) | 3 lines

Update the SimpleJIT class in the clang-interpreter example to use ORCv2.

This will remove the ORCv1 deprecation warnings.
------------------------------------------------------------------------

llvm-svn: 366556
---
 clang/examples/clang-interpreter/main.cpp | 97 +++++++++++++----------
 1 file changed, 53 insertions(+), 44 deletions(-)

diff --git a/clang/examples/clang-interpreter/main.cpp b/clang/examples/clang-interpreter/main.cpp
index 8fb52700a757e..69808428a34da 100644
--- a/clang/examples/clang-interpreter/main.cpp
+++ b/clang/examples/clang-interpreter/main.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
@@ -50,65 +51,69 @@ namespace orc {
 class SimpleJIT {
 private:
   ExecutionSession ES;
-  std::shared_ptr<SymbolResolver> Resolver;
   std::unique_ptr<TargetMachine> TM;
   const DataLayout DL;
-  LegacyRTDyldObjectLinkingLayer ObjectLayer;
-  LegacyIRCompileLayer<decltype(ObjectLayer), SimpleCompiler> CompileLayer;
+  MangleAndInterner Mangle{ES, DL};
+  RTDyldObjectLinkingLayer ObjectLayer{ES, createMemMgr};
+  IRCompileLayer CompileLayer{ES, ObjectLayer, SimpleCompiler(*TM)};
 
-public:
-  SimpleJIT()
-      : Resolver(createLegacyLookupResolver(
-            ES,
-            [this](const std::string &Name) -> JITSymbol {
-              if (auto Sym = CompileLayer.findSymbol(Name, false))
-                return Sym;
-              else if (auto Err = Sym.takeError())
-                return std::move(Err);
-              if (auto SymAddr =
-                      RTDyldMemoryManager::getSymbolAddressInProcess(Name))
-                return JITSymbol(SymAddr, JITSymbolFlags::Exported);
-              return nullptr;
-            },
-            [](Error Err) { cantFail(std::move(Err), "lookupFlags failed"); })),
-        TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
-        ObjectLayer(ES,
-                    [this](VModuleKey) {
-                      return LegacyRTDyldObjectLinkingLayer::Resources{
-                          std::make_shared<SectionMemoryManager>(), Resolver};
-                    }),
-        CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
+  static std::unique_ptr<SectionMemoryManager> createMemMgr() {
+    return llvm::make_unique<SectionMemoryManager>();
+  }
+
+  SimpleJIT(std::unique_ptr<TargetMachine> TM, DataLayout DL,
+            DynamicLibrarySearchGenerator ProcessSymbolsGenerator)
+      : TM(std::move(TM)), DL(std::move(DL)) {
     llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
+    ES.getMainJITDylib().setGenerator(std::move(ProcessSymbolsGenerator));
   }
 
-  const TargetMachine &getTargetMachine() const { return *TM; }
+public:
+  static Expected<std::unique_ptr<SimpleJIT>> Create() {
+    auto JTMB = JITTargetMachineBuilder::detectHost();
+    if (!JTMB)
+      return JTMB.takeError();
+
+    auto TM = JTMB->createTargetMachine();
+    if (!TM)
+      return TM.takeError();
+
+    auto DL = (*TM)->createDataLayout();
 
-  VModuleKey addModule(std::unique_ptr<Module> M) {
-    // Add the module to the JIT with a new VModuleKey.
-    auto K = ES.allocateVModule();
-    cantFail(CompileLayer.addModule(K, std::move(M)));
-    return K;
+    auto ProcessSymbolsGenerator =
+        DynamicLibrarySearchGenerator::GetForCurrentProcess(
+            DL.getGlobalPrefix());
+
+    if (!ProcessSymbolsGenerator)
+      return ProcessSymbolsGenerator.takeError();
+
+    return std::unique_ptr<SimpleJIT>(new SimpleJIT(
+        std::move(*TM), std::move(DL), std::move(*ProcessSymbolsGenerator)));
   }
 
-  JITSymbol findSymbol(const StringRef &Name) {
-    std::string MangledName;
-    raw_string_ostream MangledNameStream(MangledName);
-    Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
-    return CompileLayer.findSymbol(MangledNameStream.str(), true);
+  const TargetMachine &getTargetMachine() const { return *TM; }
+
+  Error addModule(ThreadSafeModule M) {
+    return CompileLayer.add(ES.getMainJITDylib(), std::move(M));
   }
 
-  JITTargetAddress getSymbolAddress(const StringRef &Name) {
-    return cantFail(findSymbol(Name).getAddress());
+  Expected<JITEvaluatedSymbol> findSymbol(const StringRef &Name) {
+    return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name));
   }
 
-  void removeModule(VModuleKey K) {
-    cantFail(CompileLayer.removeModule(K));
+  Expected<JITTargetAddress> getSymbolAddress(const StringRef &Name) {
+    auto Sym = findSymbol(Name);
+    if (!Sym)
+      return Sym.takeError();
+    return Sym->getAddress();
   }
 };
 
 } // end namespace orc
 } // end namespace llvm
 
+llvm::ExitOnError ExitOnErr;
+
 int main(int argc, const char **argv) {
   // This just needs to be some symbol in the binary; C++ doesn't
   // allow taking the address of ::main however.
@@ -130,6 +135,8 @@ int main(int argc, const char **argv) {
     T.setObjectFormat(llvm::Triple::ELF);
 #endif
 
+  ExitOnErr.setBanner("clang interpreter");
+
   Driver TheDriver(Path, T.str(), Diags);
   TheDriver.setTitle("clang interpreter");
   TheDriver.setCheckInputsExist(false);
@@ -204,14 +211,16 @@ int main(int argc, const char **argv) {
   llvm::InitializeNativeTargetAsmPrinter();
 
   int Res = 255;
+  std::unique_ptr<llvm::LLVMContext> Ctx(Act->takeLLVMContext());
   std::unique_ptr<llvm::Module> Module = Act->takeModule();
 
   if (Module) {
-    llvm::orc::SimpleJIT J;
-    auto H = J.addModule(std::move(Module));
-    auto Main = (int(*)(...))J.getSymbolAddress("main");
+    auto J = ExitOnErr(llvm::orc::SimpleJIT::Create());
+
+    ExitOnErr(J->addModule(
+        llvm::orc::ThreadSafeModule(std::move(Module), std::move(Ctx))));
+    auto Main = (int (*)(...))ExitOnErr(J->getSymbolAddress("main"));
     Res = Main();
-    J.removeModule(H);
   }
 
   // Shutdown.