Skip to content

Commit cfc76b6

Browse files
authored
[llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)
This patch moves utilities from `offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h` to `llvm/Frontend/Offloading/Utility.h` to be reused by other projects. Concretely the following changes were made: - Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`. - Remove unused fields `KernelObject`, `KernelSegmentSize`, `ExplicitArgumentCount` and `ImplicitArgumentCount` from `AMDGPUKernelMetaData`. - Return the produced error if `ELFObj.sections()` failed instead of using `cantFail`. - Added `AGPRCount` field to `AMDGPUKernelMetaData`. - Added a default invalid value to all the fields in `AMDGPUKernelMetaData`.
1 parent 5f3c0b2 commit cfc76b6

File tree

6 files changed

+312
-262
lines changed

6 files changed

+312
-262
lines changed

llvm/include/llvm/Frontend/Offloading/Utility.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@
99
#ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
1010
#define LLVM_FRONTEND_OFFLOADING_UTILITY_H
1111

12+
#include <cstdint>
13+
14+
#include "llvm/ADT/StringMap.h"
15+
#include "llvm/ADT/StringRef.h"
1216
#include "llvm/IR/Module.h"
1317
#include "llvm/Object/OffloadBinary.h"
18+
#include "llvm/Support/Error.h"
19+
#include "llvm/Support/MemoryBufferRef.h"
1420

1521
namespace llvm {
1622
namespace offloading {
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
7379
std::pair<GlobalVariable *, GlobalVariable *>
7480
getOffloadEntryArray(Module &M, StringRef SectionName);
7581

82+
namespace amdgpu {
83+
/// Check if an image is compatible with current system's environment. The
84+
/// system environment is given as a 'target-id' which has the form:
85+
///
86+
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
87+
///
88+
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
89+
/// and is compatible with either '+' or '-'. The HSA runtime returns this
90+
/// information using the target-id, while we use the ELF header to determine
91+
/// these features.
92+
bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
93+
StringRef EnvTargetID);
94+
95+
/// Struct for holding metadata related to AMDGPU kernels, for more information
96+
/// about the metadata and its meaning see:
97+
/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
98+
struct AMDGPUKernelMetaData {
99+
/// Constant indicating that a value is invalid.
100+
static constexpr uint32_t KInvalidValue =
101+
std::numeric_limits<uint32_t>::max();
102+
/// The amount of group segment memory required by a work-group in bytes.
103+
uint32_t GroupSegmentList = KInvalidValue;
104+
/// The amount of fixed private address space memory required for a work-item
105+
/// in bytes.
106+
uint32_t PrivateSegmentSize = KInvalidValue;
107+
/// Number of scalar registers required by a wavefront.
108+
uint32_t SGPRCount = KInvalidValue;
109+
/// Number of vector registers required by each work-item.
110+
uint32_t VGPRCount = KInvalidValue;
111+
/// Number of stores from a scalar register to a register allocator created
112+
/// spill location.
113+
uint32_t SGPRSpillCount = KInvalidValue;
114+
/// Number of stores from a vector register to a register allocator created
115+
/// spill location.
116+
uint32_t VGPRSpillCount = KInvalidValue;
117+
/// Number of accumulator registers required by each work-item.
118+
uint32_t AGPRCount = KInvalidValue;
119+
/// Corresponds to the OpenCL reqd_work_group_size attribute.
120+
uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
121+
KInvalidValue};
122+
/// Corresponds to the OpenCL work_group_size_hint attribute.
123+
uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
124+
/// Wavefront size.
125+
uint32_t WavefrontSize = KInvalidValue;
126+
/// Maximum flat work-group size supported by the kernel in work-items.
127+
uint32_t MaxFlatWorkgroupSize = KInvalidValue;
128+
};
129+
130+
/// Reads AMDGPU specific metadata from the ELF file and propagates the
131+
/// KernelInfoMap.
132+
Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
133+
StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
134+
uint16_t &ELFABIVersion);
135+
} // namespace amdgpu
76136
} // namespace offloading
77137
} // namespace llvm
78138

llvm/lib/Frontend/Offloading/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
1111
LINK_COMPONENTS
1212
Core
1313
BinaryFormat
14+
Object
1415
Support
1516
TransformUtils
1617
TargetParser

llvm/lib/Frontend/Offloading/Utility.cpp

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/Frontend/Offloading/Utility.h"
10+
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
11+
#include "llvm/BinaryFormat/ELF.h"
12+
#include "llvm/BinaryFormat/MsgPackDocument.h"
1013
#include "llvm/IR/Constants.h"
1114
#include "llvm/IR/GlobalValue.h"
1215
#include "llvm/IR/GlobalVariable.h"
1316
#include "llvm/IR/Value.h"
17+
#include "llvm/Object/ELFObjectFile.h"
18+
#include "llvm/Support/MemoryBufferRef.h"
19+
#include "llvm/Support/YAMLTraits.h"
1420
#include "llvm/Transforms/Utils/ModuleUtils.h"
1521

1622
using namespace llvm;
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
126132

127133
return std::make_pair(EntriesB, EntriesE);
128134
}
135+
136+
bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
137+
uint32_t ImageFlags,
138+
StringRef EnvTargetID) {
139+
using namespace llvm::ELF;
140+
StringRef EnvArch = EnvTargetID.split(":").first;
141+
142+
// Trivial check if the base processors match.
143+
if (EnvArch != ImageArch)
144+
return false;
145+
146+
// Check if the image is requesting xnack on or off.
147+
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
148+
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
149+
// The image is 'xnack-' so the environment must be 'xnack-'.
150+
if (!EnvTargetID.contains("xnack-"))
151+
return false;
152+
break;
153+
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
154+
// The image is 'xnack+' so the environment must be 'xnack+'.
155+
if (!EnvTargetID.contains("xnack+"))
156+
return false;
157+
break;
158+
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
159+
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
160+
default:
161+
break;
162+
}
163+
164+
// Check if the image is requesting sramecc on or off.
165+
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
166+
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
167+
// The image is 'sramecc-' so the environment must be 'sramecc-'.
168+
if (!EnvTargetID.contains("sramecc-"))
169+
return false;
170+
break;
171+
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
172+
// The image is 'sramecc+' so the environment must be 'sramecc+'.
173+
if (!EnvTargetID.contains("sramecc+"))
174+
return false;
175+
break;
176+
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
177+
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
178+
break;
179+
}
180+
181+
return true;
182+
}
183+
184+
namespace {
185+
/// Reads the AMDGPU specific per-kernel-metadata from an image.
186+
class KernelInfoReader {
187+
public:
188+
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
189+
: KernelInfoMap(KIM) {}
190+
191+
/// Process ELF note to read AMDGPU metadata from respective information
192+
/// fields.
193+
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
194+
if (Note.getName() != "AMDGPU")
195+
return Error::success(); // We are not interested in other things
196+
197+
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
198+
"Parse AMDGPU MetaData");
199+
auto Desc = Note.getDesc(Align);
200+
StringRef MsgPackString =
201+
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
202+
msgpack::Document MsgPackDoc;
203+
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
204+
return Error::success();
205+
206+
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
207+
if (!Verifier.verify(MsgPackDoc.getRoot()))
208+
return Error::success();
209+
210+
auto RootMap = MsgPackDoc.getRoot().getMap(true);
211+
212+
if (auto Err = iterateAMDKernels(RootMap))
213+
return Err;
214+
215+
return Error::success();
216+
}
217+
218+
private:
219+
/// Extracts the relevant information via simple string look-up in the msgpack
220+
/// document elements.
221+
Error
222+
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
223+
std::string &KernelName,
224+
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
225+
if (!V.first.isString())
226+
return Error::success();
227+
228+
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
229+
return DK.getString() == SK;
230+
};
231+
232+
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
233+
uint32_t *Vals) {
234+
assert(DN.isArray() && "MsgPack DocNode is an array node");
235+
auto DNA = DN.getArray();
236+
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
237+
238+
int I = 0;
239+
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
240+
++DNABegin) {
241+
Vals[I++] = DNABegin->getUInt();
242+
}
243+
};
244+
245+
if (IsKey(V.first, ".name")) {
246+
KernelName = V.second.toString();
247+
} else if (IsKey(V.first, ".sgpr_count")) {
248+
KernelData.SGPRCount = V.second.getUInt();
249+
} else if (IsKey(V.first, ".sgpr_spill_count")) {
250+
KernelData.SGPRSpillCount = V.second.getUInt();
251+
} else if (IsKey(V.first, ".vgpr_count")) {
252+
KernelData.VGPRCount = V.second.getUInt();
253+
} else if (IsKey(V.first, ".vgpr_spill_count")) {
254+
KernelData.VGPRSpillCount = V.second.getUInt();
255+
} else if (IsKey(V.first, ".agpr_count")) {
256+
KernelData.AGPRCount = V.second.getUInt();
257+
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
258+
KernelData.PrivateSegmentSize = V.second.getUInt();
259+
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
260+
KernelData.GroupSegmentList = V.second.getUInt();
261+
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
262+
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
263+
} else if (IsKey(V.first, ".workgroup_size_hint")) {
264+
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
265+
} else if (IsKey(V.first, ".wavefront_size")) {
266+
KernelData.WavefrontSize = V.second.getUInt();
267+
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
268+
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
269+
}
270+
271+
return Error::success();
272+
}
273+
274+
/// Get the "amdhsa.kernels" element from the msgpack Document
275+
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
276+
auto Res = MDN.find("amdhsa.kernels");
277+
if (Res == MDN.end())
278+
return createStringError(inconvertibleErrorCode(),
279+
"Could not find amdhsa.kernels key");
280+
281+
auto Pair = *Res;
282+
assert(Pair.second.isArray() &&
283+
"AMDGPU kernel entries are arrays of entries");
284+
285+
return Pair.second.getArray();
286+
}
287+
288+
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
289+
/// MapDocNode that either maps a string to a single value (most of them) or
290+
/// to another array of things. Currently, we only handle the case that maps
291+
/// to scalar value.
292+
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
293+
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
294+
std::string KernelName;
295+
auto Entry = (*It).getMap();
296+
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
297+
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
298+
return Err;
299+
300+
KernelInfoMap.insert({KernelName, KernelData});
301+
return Error::success();
302+
}
303+
304+
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
305+
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
306+
auto KernelsOrErr = getAMDKernelsArray(MDN);
307+
if (auto Err = KernelsOrErr.takeError())
308+
return Err;
309+
310+
auto KernelsArr = *KernelsOrErr;
311+
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
312+
if (!It->isMap())
313+
continue; // we expect <key,value> pairs
314+
315+
// Obtain the value for the different entries. Each array entry is a
316+
// MapDocNode
317+
if (auto Err = generateKernelInfo(It))
318+
return Err;
319+
}
320+
return Error::success();
321+
}
322+
323+
// Kernel names are the keys
324+
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
325+
};
326+
} // namespace
327+
328+
Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
329+
MemoryBufferRef MemBuffer,
330+
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
331+
uint16_t &ELFABIVersion) {
332+
Error Err = Error::success(); // Used later as out-parameter
333+
334+
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
335+
if (auto Err = ELFOrError.takeError())
336+
return Err;
337+
338+
const object::ELF64LEFile ELFObj = ELFOrError.get();
339+
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
340+
if (!Sections)
341+
return Sections.takeError();
342+
KernelInfoReader Reader(KernelInfoMap);
343+
344+
// Read the code object version from ELF image header
345+
auto Header = ELFObj.getHeader();
346+
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
347+
for (const auto &S : *Sections) {
348+
if (S.sh_type != ELF::SHT_NOTE)
349+
continue;
350+
351+
for (const auto N : ELFObj.notes(S, Err)) {
352+
if (Err)
353+
return Err;
354+
// Fills the KernelInfoTabel entries in the reader
355+
if ((Err = Reader.processNote(N, S.sh_addralign)))
356+
return Err;
357+
}
358+
}
359+
return Error::success();
360+
}

offload/plugins-nextgen/amdgpu/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
1010

1111
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
1212
message(STATUS "Building AMDGPU plugin linked against libhsa")
13-
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
13+
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
1414
else()
1515
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
1616
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
1717
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
18+
target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
1819
endif()
1920

2021
# Configure testing for the AMDGPU plugin. We will build tests if we could a

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
485485
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
486486

487487
/// Get additional info for kernel, e.g., register spill counts
488-
std::optional<utils::KernelMetaDataTy>
488+
std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
489489
getKernelInfo(StringRef Identifier) const {
490490
auto It = KernelInfoMap.find(Identifier);
491491

@@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
499499
/// The exectuable loaded on the agent.
500500
hsa_executable_t Executable;
501501
hsa_code_object_t CodeObject;
502-
StringMap<utils::KernelMetaDataTy> KernelInfoMap;
502+
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
503503
uint16_t ELFABIVersion;
504504
};
505505

@@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
600600
uint32_t ImplicitArgsSize;
601601

602602
/// Additional Info for the AMD GPU Kernel
603-
std::optional<utils::KernelMetaDataTy> KernelInfo;
603+
std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
604604
};
605605

606606
/// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
31883188
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
31893189
if (!TargeTripleAndFeaturesOrError)
31903190
return TargeTripleAndFeaturesOrError.takeError();
3191-
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
3192-
ElfOrErr->getPlatformFlags(),
3193-
*TargeTripleAndFeaturesOrError);
3191+
return offloading::amdgpu::isImageCompatibleWithEnv(
3192+
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
3193+
*TargeTripleAndFeaturesOrError);
31943194
}
31953195

31963196
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

0 commit comments

Comments
 (0)