Pass shufflevector indices as int instead of unsigned.

d0k · d0k · commit 316b49d37336 · 2020-04-15T15:52:49.000+02:00
No functionality change intended.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -11747,7 +11747,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // Splat the 8-bits of immediate 4 times to help the loop wrap around.
     Imm = (Imm & 0xff) * 0x01010101;
 
-    uint32_t Indices[16];
+    int Indices[16];
     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
       for (unsigned i = 0; i != NumLaneElts; ++i) {
         unsigned Index = Imm % NumLaneElts;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
@@ -912,7 +912,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
   // we'll just return the zero vector.
   if (Shift < 16) {
-    uint32_t Idxs[64];
+    int Idxs[64];
     // 256/512-bit version is split into 2/4 16-byte lanes.
     for (unsigned l = 0; l != NumElts; l += 16)
       for (unsigned i = 0; i != 16; ++i) {
@@ -946,7 +946,7 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
   // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
   // we'll just return the zero vector.
   if (Shift < 16) {
-    uint32_t Idxs[64];
+    int Idxs[64];
     // 256/512-bit version is split into 2/4 16-byte lanes.
     for (unsigned l = 0; l != NumElts; l += 16)
       for (unsigned i = 0; i != 16; ++i) {
@@ -972,7 +972,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
   // If we have less than 8 elements, then the starting mask was an i8 and
   // we need to extract down to the right number of elements.
   if (NumElts < 8) {
-    uint32_t Indices[4];
+    int Indices[4];
     for (unsigned i = 0; i != NumElts; ++i)
       Indices[i] = i;
     Mask = Builder.CreateShuffleVector(Mask, Mask,
@@ -1041,7 +1041,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
     Op0 = llvm::Constant::getNullValue(Op0->getType());
   }
 
-  uint32_t Indices[64];
+  int Indices[64];
   // 256-bit palignr operates on 128-bit lanes so we need to handle that
   for (unsigned l = 0; l < NumElts; l += 16) {
     for (unsigned i = 0; i != 16; ++i) {
@@ -1352,7 +1352,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
   }
 
   if (NumElts < 8) {
-    uint32_t Indices[8];
+    int Indices[8];
     for (unsigned i = 0; i != NumElts; ++i)
       Indices[i] = i;
     for (unsigned i = NumElts; i != 8; ++i)
@@ -1878,7 +1878,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned NumElts = CI->getType()->getScalarSizeInBits();
       Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
       Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
-      uint32_t Indices[64];
+      int Indices[64];
       for (unsigned i = 0; i != NumElts; ++i)
         Indices[i] = i;
 
@@ -2127,8 +2127,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned NumDstElts = DstTy->getNumElements();
       if (NumDstElts < SrcTy->getNumElements()) {
         assert(NumDstElts == 2 && "Unexpected vector size");
-        uint32_t ShuffleMask[2] = { 0, 1 };
-        Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
+        Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
       }
 
       bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
@@ -2159,8 +2158,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned NumDstElts = DstTy->getNumElements();
       if (NumDstElts != SrcTy->getNumElements()) {
         assert(NumDstElts == 4 && "Unexpected vector size");
-        uint32_t ShuffleMask[4] = {0, 1, 2, 3};
-        Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
+        Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
       }
       Rep = Builder.CreateBitCast(
           Rep, VectorType::get(Type::getHalfTy(C), NumDstElts));
@@ -2310,7 +2308,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned NumDstElts = DstTy->getNumElements();
 
       // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
-      SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
+      SmallVector<int, 8> ShuffleMask(NumDstElts);
       for (unsigned i = 0; i != NumDstElts; ++i)
         ShuffleMask[i] = i;
 
@@ -2356,7 +2354,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
       unsigned ControlBitsMask = NumLanes - 1;
       unsigned NumControlBits = NumLanes / 2;
-      SmallVector<uint32_t, 8> ShuffleMask(0);
+      SmallVector<int, 8> ShuffleMask(0);
 
       for (unsigned l = 0; l != NumLanes; ++l) {
         unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
@@ -2376,7 +2374,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
           cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements();
       unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements();
 
-      SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
+      SmallVector<int, 8> ShuffleMask(NumDstElts);
       for (unsigned i = 0; i != NumDstElts; ++i)
         ShuffleMask[i] = i % NumSrcElts;
 
@@ -2466,7 +2464,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       VectorType *VecTy = cast<VectorType>(CI->getType());
       unsigned NumElts = VecTy->getNumElements();
 
-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
       for (unsigned i = 0; i != NumElts; ++i)
         Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
 
@@ -2486,7 +2484,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
       // Extend the second operand into a vector the size of the destination.
       Value *UndefV = UndefValue::get(Op1->getType());
-      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      SmallVector<int, 8> Idxs(DstNumElts);
       for (unsigned i = 0; i != SrcNumElts; ++i)
         Idxs[i] = i;
       for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
@@ -2529,7 +2527,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Imm = Imm % Scale;
 
       // Get indexes for the subvector of the input vector.
-      SmallVector<uint32_t, 8> Idxs(DstNumElts);
+      SmallVector<int, 8> Idxs(DstNumElts);
       for (unsigned i = 0; i != DstNumElts; ++i) {
         Idxs[i] = i + (Imm * DstNumElts);
       }
@@ -2548,7 +2546,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       VectorType *VecTy = cast<VectorType>(CI->getType());
       unsigned NumElts = VecTy->getNumElements();
 
-      SmallVector<uint32_t, 8> Idxs(NumElts);
+      SmallVector<int, 8> Idxs(NumElts);
       for (unsigned i = 0; i != NumElts; ++i)
         Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
 
@@ -2571,7 +2569,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
       unsigned HalfSize = NumElts / 2;
-      SmallVector<uint32_t, 8> ShuffleMask(NumElts);
+      SmallVector<int, 8> ShuffleMask(NumElts);
 
       // Determine which operand(s) are actually in use for this instruction.
       Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
@@ -2605,7 +2603,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
       unsigned IdxMask = ((1 << IdxSize) - 1);
 
-      SmallVector<uint32_t, 8> Idxs(NumElts);
+      SmallVector<int, 8> Idxs(NumElts);
       // Lookup the bits for this element, wrapping around the immediate every
       // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
       // to offset by the first index of each group.
@@ -2623,7 +2621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
 
-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
       for (unsigned l = 0; l != NumElts; l += 8) {
         for (unsigned i = 0; i != 4; ++i)
           Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
@@ -2642,7 +2640,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements();
 
-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
       for (unsigned l = 0; l != NumElts; l += 8) {
         for (unsigned i = 0; i != 4; ++i)
           Idxs[i + l] = i + l;
@@ -2664,7 +2662,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
       unsigned HalfLaneElts = NumLaneElts / 2;
 
-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
       for (unsigned i = 0; i != NumElts; ++i) {
         // Base index is the starting element of the lane.
         Idxs[i] = i - (i % NumLaneElts);
@@ -2691,7 +2689,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       if (Name.startswith("avx512.mask.movshdup."))
         Offset = 1;
 
-      SmallVector<uint32_t, 16> Idxs(NumElts);
+      SmallVector<int, 16> Idxs(NumElts);
       for (unsigned l = 0; l != NumElts; l += NumLaneElts)
         for (unsigned i = 0; i != NumLaneElts; i += 2) {
           Idxs[i + l + 0] = i + l + Offset;
@@ -2709,7 +2707,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       int NumElts = cast<VectorType>(CI->getType())->getNumElements();
       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
 
-      SmallVector<uint32_t, 64> Idxs(NumElts);
+      SmallVector<int, 64> Idxs(NumElts);
       for (int l = 0; l != NumElts; l += NumLaneElts)
         for (int i = 0; i != NumLaneElts; ++i)
           Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
@@ -2725,7 +2723,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       int NumElts = cast<VectorType>(CI->getType())->getNumElements();
       int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
 
-      SmallVector<uint32_t, 64> Idxs(NumElts);
+      SmallVector<int, 64> Idxs(NumElts);
       for (int l = 0; l != NumElts; l += NumLaneElts)
         for (int i = 0; i != NumLaneElts; ++i)
           Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
@@ -3304,7 +3302,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         if (IsSubAdd)
           std::swap(Even, Odd);
 
-        SmallVector<uint32_t, 32> Idxs(NumElts);
+        SmallVector<int, 32> Idxs(NumElts);
         for (int i = 0; i != NumElts; ++i)
           Idxs[i] = i + (i % 2) * NumElts;
 
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -229,11 +229,11 @@ static MVT scaleVectorType(MVT VT) {
                           VT.getVectorNumElements() / 2);
 }
 
-static uint32_t Concat[] = {
-  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
+static constexpr int Concat[] = {
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
 
 // genShuffleBland - Creates shuffle according to two vectors.This function is
 // only works on instructions with lane inside 256 registers. According to
@@ -251,9 +251,9 @@ static uint32_t Concat[] = {
 // By computing the shuffle on a sequence of 16 elements(one lane) and add the
 // correct offset. We are creating a vpsuffed + blend sequence between two
 // shuffles.
-static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
-  SmallVectorImpl<uint32_t> &Out, int LowOffset,
-  int HighOffset) {
+static void genShuffleBland(MVT VT, ArrayRef<int> Mask,
+                            SmallVectorImpl<int> &Out, int LowOffset,
+                            int HighOffset) {
   assert(VT.getSizeInBits() >= 256 &&
     "This function doesn't accept width smaller then 256");
   unsigned NumOfElm = VT.getVectorNumElements();
@@ -282,9 +282,9 @@ static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask,
 // Invec[2] -  |2|5|8|11|     TransposedMatrix[2] - |8|9|10|11|
 
 static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
-  ArrayRef<Value *> Vec, ArrayRef<uint32_t> VPShuf,
-  unsigned VecElems, unsigned Stride,
-  IRBuilder<> &Builder) {
+                             ArrayRef<Value *> Vec, ArrayRef<int> VPShuf,
+                             unsigned VecElems, unsigned Stride,
+                             IRBuilder<> &Builder) {
 
   if (VecElems == 16) {
     for (unsigned i = 0; i < Stride; i++)
@@ -293,7 +293,7 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
     return;
   }
 
-  SmallVector<uint32_t, 32> OptimizeShuf;
+  SmallVector<int, 32> OptimizeShuf;
   Value *Temp[8];
 
   for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) {
@@ -433,7 +433,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4(
 //  For example shuffle pattern for VF 16 register size 256 -> lanes = 2
 //  {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>}
 static void createShuffleStride(MVT VT, int Stride,
-                                SmallVectorImpl<uint32_t> &Mask) {
+                                SmallVectorImpl<int> &Mask) {
   int VectorSize = VT.getSizeInBits();
   int VF = VT.getVectorNumElements();
   int LaneCount = std::max(VectorSize / 128, 1);
@@ -446,7 +446,7 @@ static void createShuffleStride(MVT VT, int Stride,
 //  inside mask a shuffleMask. A mask contains exactly 3 groups, where
 //  each group is a monotonically increasing sequence with stride 3.
 //  For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2}
-static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
+static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) {
   int VectorSize = VT.getSizeInBits();
   int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1);
   for (int i = 0, FirstGroupElement = 0; i < 3; i++) {
@@ -470,7 +470,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
 //  direction of the alignment. (false - align to the "right" side while true -
 //  align to the "left" side)
 static void DecodePALIGNRMask(MVT VT, unsigned Imm,
-                              SmallVectorImpl<uint32_t> &ShuffleMask,
+                              SmallVectorImpl<int> &ShuffleMask,
                               bool AlignDirection = true, bool Unary = false) {
   unsigned NumElts = VT.getVectorNumElements();
   unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
@@ -547,11 +547,11 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
   // Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7
 
   TransposedMatrix.resize(3);
-  SmallVector<uint32_t, 32> VPShuf;
-  SmallVector<uint32_t, 32> VPAlign[2];
-  SmallVector<uint32_t, 32> VPAlign2;
-  SmallVector<uint32_t, 32> VPAlign3;
-  SmallVector<uint32_t, 3> GroupSize;
+  SmallVector<int, 32> VPShuf;
+  SmallVector<int, 32> VPAlign[2];
+  SmallVector<int, 32> VPAlign2;
+  SmallVector<int, 32> VPAlign3;
+  SmallVector<int, 3> GroupSize;
   Value *Vec[6], *TempVector[3];
 
   MVT VT = MVT::getVT(Shuffles[0]->getType());
@@ -605,8 +605,8 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
 // group2Shuffle reorder the shuffle stride back into continuous order.
 // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} =>
 // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}.
-static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
-                          SmallVectorImpl<uint32_t> &Output) {
+static void group2Shuffle(MVT VT, SmallVectorImpl<int> &Mask,
+                          SmallVectorImpl<int> &Output) {
   int IndexGroup[3] = {0, 0, 0};
   int Index = 0;
   int VectorWidth = VT.getSizeInBits();
@@ -633,11 +633,11 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
   // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7
 
   TransposedMatrix.resize(3);
-  SmallVector<uint32_t, 3> GroupSize;
-  SmallVector<uint32_t, 32> VPShuf;
-  SmallVector<uint32_t, 32> VPAlign[3];
-  SmallVector<uint32_t, 32> VPAlign2;
-  SmallVector<uint32_t, 32> VPAlign3;
+  SmallVector<int, 3> GroupSize;
+  SmallVector<int, 32> VPShuf;
+  SmallVector<int, 32> VPAlign[3];
+  SmallVector<int, 32> VPAlign2;
+  SmallVector<int, 32> VPAlign3;
 
   Value *Vec[3], *TempVector[3];
   MVT VT = MVT::getVectorVT(MVT::i8, VecElems);
@@ -692,25 +692,25 @@ void X86InterleavedAccessGroup::transpose_4x4(
   TransposedMatrix.resize(4);
 
   // dst = src1[0,1],src2[0,1]
-  uint32_t IntMask1[] = {0, 1, 4, 5};
-  ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4);
+  static constexpr int IntMask1[] = {0, 1, 4, 5};
+  ArrayRef<int> Mask = makeArrayRef(IntMask1, 4);
   Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
   Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
 
   // dst = src1[2,3],src2[2,3]
-  uint32_t IntMask2[] = {2, 3, 6, 7};
+  static constexpr int IntMask2[] = {2, 3, 6, 7};
   Mask = makeArrayRef(IntMask2, 4);
   Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
   Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
 
   // dst = src1[0],src2[0],src1[2],src2[2]
-  uint32_t IntMask3[] = {0, 4, 2, 6};
+  static constexpr int IntMask3[] = {0, 4, 2, 6};
   Mask = makeArrayRef(IntMask3, 4);
   TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
   TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
 
   // dst = src1[1],src2[1],src1[3],src2[3]
-  uint32_t IntMask4[] = {1, 5, 3, 7};
+  static constexpr int IntMask4[] = {1, 5, 3, 7};
   Mask = makeArrayRef(IntMask4, 4);
   TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
   TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);