|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include "llvm/Frontend/Offloading/Utility.h"
|
| 10 | +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" |
| 11 | +#include "llvm/BinaryFormat/ELF.h" |
| 12 | +#include "llvm/BinaryFormat/MsgPackDocument.h" |
10 | 13 | #include "llvm/IR/Constants.h"
|
11 | 14 | #include "llvm/IR/GlobalValue.h"
|
12 | 15 | #include "llvm/IR/GlobalVariable.h"
|
13 | 16 | #include "llvm/IR/Value.h"
|
| 17 | +#include "llvm/Object/ELFObjectFile.h" |
| 18 | +#include "llvm/Support/MemoryBufferRef.h" |
| 19 | +#include "llvm/Support/YAMLTraits.h" |
14 | 20 | #include "llvm/Transforms/Utils/ModuleUtils.h"
|
15 | 21 |
|
16 | 22 | using namespace llvm;
|
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
|
126 | 132 |
|
127 | 133 | return std::make_pair(EntriesB, EntriesE);
|
128 | 134 | }
|
| 135 | + |
| 136 | +bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch, |
| 137 | + uint32_t ImageFlags, |
| 138 | + StringRef EnvTargetID) { |
| 139 | + using namespace llvm::ELF; |
| 140 | + StringRef EnvArch = EnvTargetID.split(":").first; |
| 141 | + |
| 142 | + // Trivial check if the base processors match. |
| 143 | + if (EnvArch != ImageArch) |
| 144 | + return false; |
| 145 | + |
| 146 | + // Check if the image is requesting xnack on or off. |
| 147 | + switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) { |
| 148 | + case EF_AMDGPU_FEATURE_XNACK_OFF_V4: |
| 149 | + // The image is 'xnack-' so the environment must be 'xnack-'. |
| 150 | + if (!EnvTargetID.contains("xnack-")) |
| 151 | + return false; |
| 152 | + break; |
| 153 | + case EF_AMDGPU_FEATURE_XNACK_ON_V4: |
| 154 | + // The image is 'xnack+' so the environment must be 'xnack+'. |
| 155 | + if (!EnvTargetID.contains("xnack+")) |
| 156 | + return false; |
| 157 | + break; |
| 158 | + case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4: |
| 159 | + case EF_AMDGPU_FEATURE_XNACK_ANY_V4: |
| 160 | + default: |
| 161 | + break; |
| 162 | + } |
| 163 | + |
| 164 | + // Check if the image is requesting sramecc on or off. |
| 165 | + switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) { |
| 166 | + case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: |
| 167 | + // The image is 'sramecc-' so the environment must be 'sramecc-'. |
| 168 | + if (!EnvTargetID.contains("sramecc-")) |
| 169 | + return false; |
| 170 | + break; |
| 171 | + case EF_AMDGPU_FEATURE_SRAMECC_ON_V4: |
| 172 | + // The image is 'sramecc+' so the environment must be 'sramecc+'. |
| 173 | + if (!EnvTargetID.contains("sramecc+")) |
| 174 | + return false; |
| 175 | + break; |
| 176 | + case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4: |
| 177 | + case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4: |
| 178 | + break; |
| 179 | + } |
| 180 | + |
| 181 | + return true; |
| 182 | +} |
| 183 | + |
| 184 | +namespace { |
| 185 | +/// Reads the AMDGPU specific per-kernel-metadata from an image. |
| 186 | +class KernelInfoReader { |
| 187 | +public: |
| 188 | + KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM) |
| 189 | + : KernelInfoMap(KIM) {} |
| 190 | + |
| 191 | + /// Process ELF note to read AMDGPU metadata from respective information |
| 192 | + /// fields. |
| 193 | + Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) { |
| 194 | + if (Note.getName() != "AMDGPU") |
| 195 | + return Error::success(); // We are not interested in other things |
| 196 | + |
| 197 | + assert(Note.getType() == ELF::NT_AMDGPU_METADATA && |
| 198 | + "Parse AMDGPU MetaData"); |
| 199 | + auto Desc = Note.getDesc(Align); |
| 200 | + StringRef MsgPackString = |
| 201 | + StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size()); |
| 202 | + msgpack::Document MsgPackDoc; |
| 203 | + if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) |
| 204 | + return Error::success(); |
| 205 | + |
| 206 | + AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); |
| 207 | + if (!Verifier.verify(MsgPackDoc.getRoot())) |
| 208 | + return Error::success(); |
| 209 | + |
| 210 | + auto RootMap = MsgPackDoc.getRoot().getMap(true); |
| 211 | + |
| 212 | + if (auto Err = iterateAMDKernels(RootMap)) |
| 213 | + return Err; |
| 214 | + |
| 215 | + return Error::success(); |
| 216 | + } |
| 217 | + |
| 218 | +private: |
| 219 | + /// Extracts the relevant information via simple string look-up in the msgpack |
| 220 | + /// document elements. |
| 221 | + Error |
| 222 | + extractKernelData(msgpack::MapDocNode::MapTy::value_type V, |
| 223 | + std::string &KernelName, |
| 224 | + offloading::amdgpu::AMDGPUKernelMetaData &KernelData) { |
| 225 | + if (!V.first.isString()) |
| 226 | + return Error::success(); |
| 227 | + |
| 228 | + const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) { |
| 229 | + return DK.getString() == SK; |
| 230 | + }; |
| 231 | + |
| 232 | + const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN, |
| 233 | + uint32_t *Vals) { |
| 234 | + assert(DN.isArray() && "MsgPack DocNode is an array node"); |
| 235 | + auto DNA = DN.getArray(); |
| 236 | + assert(DNA.size() == 3 && "ArrayNode has at most three elements"); |
| 237 | + |
| 238 | + int I = 0; |
| 239 | + for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; |
| 240 | + ++DNABegin) { |
| 241 | + Vals[I++] = DNABegin->getUInt(); |
| 242 | + } |
| 243 | + }; |
| 244 | + |
| 245 | + if (IsKey(V.first, ".name")) { |
| 246 | + KernelName = V.second.toString(); |
| 247 | + } else if (IsKey(V.first, ".sgpr_count")) { |
| 248 | + KernelData.SGPRCount = V.second.getUInt(); |
| 249 | + } else if (IsKey(V.first, ".sgpr_spill_count")) { |
| 250 | + KernelData.SGPRSpillCount = V.second.getUInt(); |
| 251 | + } else if (IsKey(V.first, ".vgpr_count")) { |
| 252 | + KernelData.VGPRCount = V.second.getUInt(); |
| 253 | + } else if (IsKey(V.first, ".vgpr_spill_count")) { |
| 254 | + KernelData.VGPRSpillCount = V.second.getUInt(); |
| 255 | + } else if (IsKey(V.first, ".agpr_count")) { |
| 256 | + KernelData.AGPRCount = V.second.getUInt(); |
| 257 | + } else if (IsKey(V.first, ".private_segment_fixed_size")) { |
| 258 | + KernelData.PrivateSegmentSize = V.second.getUInt(); |
| 259 | + } else if (IsKey(V.first, ".group_segment_fixed_size")) { |
| 260 | + KernelData.GroupSegmentList = V.second.getUInt(); |
| 261 | + } else if (IsKey(V.first, ".reqd_workgroup_size")) { |
| 262 | + GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); |
| 263 | + } else if (IsKey(V.first, ".workgroup_size_hint")) { |
| 264 | + GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); |
| 265 | + } else if (IsKey(V.first, ".wavefront_size")) { |
| 266 | + KernelData.WavefrontSize = V.second.getUInt(); |
| 267 | + } else if (IsKey(V.first, ".max_flat_workgroup_size")) { |
| 268 | + KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); |
| 269 | + } |
| 270 | + |
| 271 | + return Error::success(); |
| 272 | + } |
| 273 | + |
| 274 | + /// Get the "amdhsa.kernels" element from the msgpack Document |
| 275 | + Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) { |
| 276 | + auto Res = MDN.find("amdhsa.kernels"); |
| 277 | + if (Res == MDN.end()) |
| 278 | + return createStringError(inconvertibleErrorCode(), |
| 279 | + "Could not find amdhsa.kernels key"); |
| 280 | + |
| 281 | + auto Pair = *Res; |
| 282 | + assert(Pair.second.isArray() && |
| 283 | + "AMDGPU kernel entries are arrays of entries"); |
| 284 | + |
| 285 | + return Pair.second.getArray(); |
| 286 | + } |
| 287 | + |
| 288 | + /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a |
| 289 | + /// MapDocNode that either maps a string to a single value (most of them) or |
| 290 | + /// to another array of things. Currently, we only handle the case that maps |
| 291 | + /// to scalar value. |
| 292 | + Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { |
| 293 | + offloading::amdgpu::AMDGPUKernelMetaData KernelData; |
| 294 | + std::string KernelName; |
| 295 | + auto Entry = (*It).getMap(); |
| 296 | + for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) |
| 297 | + if (auto Err = extractKernelData(*MI, KernelName, KernelData)) |
| 298 | + return Err; |
| 299 | + |
| 300 | + KernelInfoMap.insert({KernelName, KernelData}); |
| 301 | + return Error::success(); |
| 302 | + } |
| 303 | + |
| 304 | + /// Go over the list of AMD kernels in the "amdhsa.kernels" entry |
| 305 | + Error iterateAMDKernels(msgpack::MapDocNode &MDN) { |
| 306 | + auto KernelsOrErr = getAMDKernelsArray(MDN); |
| 307 | + if (auto Err = KernelsOrErr.takeError()) |
| 308 | + return Err; |
| 309 | + |
| 310 | + auto KernelsArr = *KernelsOrErr; |
| 311 | + for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { |
| 312 | + if (!It->isMap()) |
| 313 | + continue; // we expect <key,value> pairs |
| 314 | + |
| 315 | + // Obtain the value for the different entries. Each array entry is a |
| 316 | + // MapDocNode |
| 317 | + if (auto Err = generateKernelInfo(It)) |
| 318 | + return Err; |
| 319 | + } |
| 320 | + return Error::success(); |
| 321 | + } |
| 322 | + |
| 323 | + // Kernel names are the keys |
| 324 | + StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap; |
| 325 | +}; |
| 326 | +} // namespace |
| 327 | + |
| 328 | +Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( |
| 329 | + MemoryBufferRef MemBuffer, |
| 330 | + StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap, |
| 331 | + uint16_t &ELFABIVersion) { |
| 332 | + Error Err = Error::success(); // Used later as out-parameter |
| 333 | + |
| 334 | + auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); |
| 335 | + if (auto Err = ELFOrError.takeError()) |
| 336 | + return Err; |
| 337 | + |
| 338 | + const object::ELF64LEFile ELFObj = ELFOrError.get(); |
| 339 | + Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections(); |
| 340 | + if (!Sections) |
| 341 | + return Sections.takeError(); |
| 342 | + KernelInfoReader Reader(KernelInfoMap); |
| 343 | + |
| 344 | + // Read the code object version from ELF image header |
| 345 | + auto Header = ELFObj.getHeader(); |
| 346 | + ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]); |
| 347 | + for (const auto &S : *Sections) { |
| 348 | + if (S.sh_type != ELF::SHT_NOTE) |
| 349 | + continue; |
| 350 | + |
| 351 | + for (const auto N : ELFObj.notes(S, Err)) { |
| 352 | + if (Err) |
| 353 | + return Err; |
| 354 | + // Fills the KernelInfoTabel entries in the reader |
| 355 | + if ((Err = Reader.processNote(N, S.sh_addralign))) |
| 356 | + return Err; |
| 357 | + } |
| 358 | + } |
| 359 | + return Error::success(); |
| 360 | +} |
0 commit comments