Skip to content

Commit 6a1b119

Browse files
authored
[AMDGPU] Add intrinsics for atomic struct buffer loads (llvm#100140)
Mark these intrinsics as atomic loads within LLVM to prevent hoisting out of loops in cases where the load is considered invariant. Similar to llvm#97707, but for struct buffer loads.
1 parent 666e332 commit 6a1b119

6 files changed

+772
-3
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,23 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
12001200
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
12011201
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
12021202

1203+
class AMDGPUStructAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1204+
[data_ty],
1205+
[llvm_v4i32_ty, // rsrc(SGPR)
1206+
llvm_i32_ty, // vindex(VGPR)
1207+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
1208+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1209+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1210+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1211+
// bit 3 = swz, bit 4 = scc (gfx90a)
1212+
// gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1213+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1214+
// bit 6 = swz
1215+
// all: volatile op (bit 31, stripped at lowering)
1216+
[ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1217+
AMDGPURsrcIntrinsic<0>;
1218+
def int_amdgcn_struct_atomic_buffer_load : AMDGPUStructAtomicBufferLoad;
1219+
12031220
class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
12041221
[data_ty],
12051222
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
@@ -1219,6 +1236,24 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn
12191236
def int_amdgcn_struct_ptr_buffer_load_format : AMDGPUStructPtrBufferLoad;
12201237
def int_amdgcn_struct_ptr_buffer_load : AMDGPUStructPtrBufferLoad;
12211238

1239+
class AMDGPUStructPtrAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
1240+
[data_ty],
1241+
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
1242+
llvm_i32_ty, // vindex(VGPR)
1243+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
1244+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
1245+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1246+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1247+
// bit 3 = swz, bit 4 = scc (gfx90a)
1248+
// gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1249+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1250+
// bit 6 = swz
1251+
// all: volatile op (bit 31, stripped at lowering)
1252+
[IntrArgMemOnly, NoCapture<ArgIndex<0>>,
1253+
ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
1254+
AMDGPURsrcIntrinsic<0>;
1255+
def int_amdgcn_struct_ptr_atomic_buffer_load : AMDGPUStructPtrAtomicBufferLoad;
1256+
12221257
class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
12231258
[],
12241259
[data_ty, // vdata(VGPR)

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7375,6 +7375,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73757375
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
73767376
case Intrinsic::amdgcn_struct_buffer_load:
73777377
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7378+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
7379+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
73787380
return legalizeBufferLoad(MI, MRI, B, false, false);
73797381
case Intrinsic::amdgcn_raw_buffer_load_format:
73807382
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5020,7 +5020,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
50205020
case Intrinsic::amdgcn_struct_buffer_load:
50215021
case Intrinsic::amdgcn_struct_ptr_buffer_load:
50225022
case Intrinsic::amdgcn_struct_tbuffer_load:
5023-
case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
5023+
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5024+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
5025+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
50245026
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
50255027
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
50265028
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,7 +1278,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12781278
return true;
12791279
}
12801280
case Intrinsic::amdgcn_raw_atomic_buffer_load:
1281-
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load: {
1281+
case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
1282+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
1283+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
12821284
Info.memVT =
12831285
memVTFromLoadIntrReturn(*this, MF.getDataLayout(), CI.getType(),
12841286
std::numeric_limits<unsigned>::max());
@@ -8925,7 +8927,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
89258927
case Intrinsic::amdgcn_struct_buffer_load:
89268928
case Intrinsic::amdgcn_struct_ptr_buffer_load:
89278929
case Intrinsic::amdgcn_struct_buffer_load_format:
8928-
case Intrinsic::amdgcn_struct_ptr_buffer_load_format: {
8930+
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
8931+
case Intrinsic::amdgcn_struct_atomic_buffer_load:
8932+
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
89298933
const bool IsFormat =
89308934
IntrID == Intrinsic::amdgcn_struct_buffer_load_format ||
89318935
IntrID == Intrinsic::amdgcn_struct_ptr_buffer_load_format;

0 commit comments

Comments
 (0)