235#include "llvm/IR/IntrinsicsAMDGPU.h"
250#define DEBUG_TYPE "amdgpu-lower-buffer-fat-pointers"
273 void clear() { Map.clear(); }
279class BufferFatPtrToIntTypeMap :
public BufferFatPtrTypeLoweringBase {
280 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
290class BufferFatPtrToStructTypeMap :
public BufferFatPtrTypeLoweringBase {
291 using BufferFatPtrTypeLoweringBase::BufferFatPtrTypeLoweringBase;
300Type *BufferFatPtrTypeLoweringBase::remapTypeImpl(
305 if (
auto *PT = dyn_cast<PointerType>(Ty)) {
307 return *
Entry = remapScalar(PT);
310 if (
auto *VT = dyn_cast<VectorType>(Ty)) {
311 auto *PT = dyn_cast<PointerType>(VT->getElementType());
313 return *
Entry = remapVector(VT);
320 StructType *TyAsStruct = dyn_cast<StructType>(Ty);
321 bool IsUniqued = !TyAsStruct || TyAsStruct->
isLiteral();
328 if (!Seen.
insert(TyAsStruct).second) {
330 return *
Entry = Placeholder;
333 bool Changed =
false;
337 Type *NewElem = remapTypeImpl(OldElem, Seen);
338 ElementTypes[
I] = NewElem;
339 Changed |= (OldElem != NewElem);
346 if (
auto *ArrTy = dyn_cast<ArrayType>(Ty))
347 return *Entry = ArrayType::get(ElementTypes[0], ArrTy->getNumElements());
348 if (
auto *FnTy = dyn_cast<FunctionType>(Ty))
349 return *Entry = FunctionType::get(ElementTypes[0],
352 if (
auto *STy = dyn_cast<StructType>(Ty)) {
361 Type **RecursionEntry = &
Map[Ty];
362 if (*RecursionEntry) {
363 auto *Placeholder = cast<StructType>(*RecursionEntry);
364 Placeholder->setBody(ElementTypes, IsPacked);
365 Placeholder->setName(
Name);
366 return *
Entry = Placeholder;
374Type *BufferFatPtrTypeLoweringBase::remapType(
Type *SrcTy) {
376 return remapTypeImpl(SrcTy, Visited);
403 auto *ST = dyn_cast<StructType>(Ty);
406 if (!ST->isLiteral() || ST->getNumElements() != 2)
409 dyn_cast<PointerType>(ST->getElementType(0)->getScalarType());
411 dyn_cast<IntegerType>(ST->getElementType(1)->getScalarType());
412 return MaybeRsrc && MaybeOff &&
421 return isBufferFatPtrOrVector(U.get()->getType());
434class StoreFatPtrsAsIntsVisitor
435 :
public InstVisitor<StoreFatPtrsAsIntsVisitor, bool> {
436 BufferFatPtrToIntTypeMap *TypeMap;
451 StoreFatPtrsAsIntsVisitor(BufferFatPtrToIntTypeMap *TypeMap,
LLVMContext &Ctx)
452 : TypeMap(TypeMap), IRB(Ctx) {}
468 if (
Find != ConvertedForStore.end())
471 Value *Cast = IRB.CreatePtrToInt(V, To,
Name +
".int");
472 ConvertedForStore[
V] = Cast;
475 if (
From->getNumContainedTypes() == 0)
479 if (
auto *AT = dyn_cast<ArrayType>(
From)) {
481 Type *ToPart = cast<ArrayType>(To)->getElementType();
482 for (
uint64_t I = 0, E = AT->getArrayNumElements();
I < E; ++
I) {
486 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
489 for (
auto [
Idx, FromPart, ToPart] :
494 Ret = IRB.CreateInsertValue(Ret, NewField,
Idx);
497 ConvertedForStore[
V] =
Ret;
506 Value *Cast = IRB.CreateIntToPtr(V, To,
Name +
".ptr");
509 if (
From->getNumContainedTypes() == 0)
513 if (
auto *AT = dyn_cast<ArrayType>(
From)) {
515 Type *ToPart = cast<ArrayType>(To)->getElementType();
516 for (
uint64_t I = 0, E = AT->getArrayNumElements();
I < E; ++
I) {
520 Ret = IRB.CreateInsertValue(Ret, NewField,
I);
523 for (
auto [
Idx, FromPart, ToPart] :
528 Ret = IRB.CreateInsertValue(Ret, NewField,
Idx);
534bool StoreFatPtrsAsIntsVisitor::processFunction(
Function &
F) {
535 bool Changed =
false;
541 ConvertedForStore.clear();
545bool StoreFatPtrsAsIntsVisitor::visitAllocaInst(
AllocaInst &
I) {
546 Type *Ty =
I.getAllocatedType();
547 Type *NewTy = TypeMap->remapType(Ty);
550 I.setAllocatedType(NewTy);
555 Type *Ty =
I.getSourceElementType();
556 Type *NewTy = TypeMap->remapType(Ty);
561 I.setSourceElementType(NewTy);
562 I.setResultElementType(TypeMap->remapType(
I.getResultElementType()));
566bool StoreFatPtrsAsIntsVisitor::visitLoadInst(
LoadInst &LI) {
568 Type *IntTy = TypeMap->remapType(Ty);
572 IRB.SetInsertPoint(&LI);
573 auto *NLI = cast<LoadInst>(LI.
clone());
574 NLI->mutateType(IntTy);
575 NLI = IRB.Insert(NLI);
578 Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName());
584bool StoreFatPtrsAsIntsVisitor::visitStoreInst(
StoreInst &SI) {
586 Type *Ty =
V->getType();
587 Type *IntTy = TypeMap->remapType(Ty);
591 IRB.SetInsertPoint(&SI);
592 Value *IntV = fatPtrsToInts(V, Ty, IntTy,
V->getName());
596 SI.setOperand(0, IntV);
616class LegalizeBufferContentTypesVisitor
617 :
public InstVisitor<LegalizeBufferContentTypesVisitor, bool> {
626 Type *scalarArrayTypeAsVector(
Type *MaybeArrayType);
663 Type *intrinsicTypeFor(
Type *LegalType);
669 std::pair<bool, bool> visitStoreImpl(
StoreInst &OrigSI,
Type *PartType,
680 : IRB(Ctx),
DL(
DL) {}
685Type *LegalizeBufferContentTypesVisitor::scalarArrayTypeAsVector(
Type *
T) {
689 Type *ET = AT->getElementType();
692 "should have recursed");
693 if (!
DL.typeSizeEqualsStoreSize(AT))
695 "loading padded arrays from buffer fat pinters should have recursed");
699Value *LegalizeBufferContentTypesVisitor::arrayToVector(
Value *V,
703 auto *VT = cast<FixedVectorType>(TargetType);
704 unsigned EC = VT->getNumElements();
707 VectorRes = IRB.CreateInsertElement(VectorRes, Elem,
I,
713Value *LegalizeBufferContentTypesVisitor::vectorToArray(
Value *V,
717 ArrayType *AT = cast<ArrayType>(OrigType);
718 unsigned EC = AT->getNumElements();
721 ArrayRes = IRB.CreateInsertValue(ArrayRes, Elem,
I,
727Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(
Type *
T) {
730 if (!
DL.typeSizeEqualsStoreSize(
T))
731 T = IRB.getIntNTy(
Size.getFixedValue());
732 Type *ElemTy =
T->getScalarType();
733 if (isa<PointerType, ScalableVectorType>(ElemTy)) {
738 unsigned ElemSize =
DL.getTypeSizeInBits(ElemTy).getFixedValue();
739 if (
isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) {
744 Type *BestVectorElemType =
nullptr;
745 if (
Size.isKnownMultipleOf(32))
747 else if (
Size.isKnownMultipleOf(16))
751 unsigned NumCastElems =
753 if (NumCastElems == 1)
754 return BestVectorElemType;
758Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
760 Type *SourceType =
V->getType();
761 TypeSize SourceSize =
DL.getTypeSizeInBits(SourceType);
762 TypeSize TargetSize =
DL.getTypeSizeInBits(TargetType);
763 if (SourceSize != TargetSize) {
766 Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy,
Name +
".as.scalar");
767 Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy,
Name +
".zext");
769 SourceType = ByteScalarTy;
771 return IRB.CreateBitCast(V, TargetType,
Name +
".legal");
774Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate(
776 Type *LegalType =
V->getType();
777 TypeSize LegalSize =
DL.getTypeSizeInBits(LegalType);
778 TypeSize OrigSize =
DL.getTypeSizeInBits(OrigType);
779 if (LegalSize != OrigSize) {
782 Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy,
Name +
".bytes.cast");
783 Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy,
Name +
".trunc");
784 return IRB.CreateBitCast(Trunc, OrigType,
Name +
".orig");
786 return IRB.CreateBitCast(V, OrigType,
Name +
".real.ty");
789Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(
Type *LegalType) {
790 auto *VT = dyn_cast<FixedVectorType>(LegalType);
793 Type *ET = VT->getElementType();
796 if (VT->getNumElements() == 1)
798 if (
DL.getTypeSizeInBits(LegalType) == 96 &&
DL.getTypeSizeInBits(ET) < 32)
801 switch (VT->getNumElements()) {
819void LegalizeBufferContentTypesVisitor::getVecSlices(
822 auto *VT = dyn_cast<FixedVectorType>(
T);
827 DL.getTypeSizeInBits(VT->getElementType()).getFixedValue();
829 uint64_t ElemsPer4Words = 128 / ElemBitWidth;
830 uint64_t ElemsPer2Words = ElemsPer4Words / 2;
831 uint64_t ElemsPerWord = ElemsPer2Words / 2;
832 uint64_t ElemsPerShort = ElemsPerWord / 2;
833 uint64_t ElemsPerByte = ElemsPerShort / 2;
837 uint64_t ElemsPer3Words = ElemsPerWord * 3;
839 uint64_t TotalElems = VT->getNumElements();
841 auto TrySlice = [&](
unsigned MaybeLen) {
842 if (MaybeLen > 0 && Index + MaybeLen <= TotalElems) {
843 VecSlice Slice{
Index, MaybeLen};
850 while (Index < TotalElems) {
851 TrySlice(ElemsPer4Words) || TrySlice(ElemsPer3Words) ||
852 TrySlice(ElemsPer2Words) || TrySlice(ElemsPerWord) ||
853 TrySlice(ElemsPerShort) || TrySlice(ElemsPerByte);
857Value *LegalizeBufferContentTypesVisitor::extractSlice(
Value *Vec, VecSlice S,
859 auto *VecVT = dyn_cast<FixedVectorType>(Vec->
getType());
862 if (S.Length == VecVT->getNumElements() && S.Index == 0)
865 return IRB.CreateExtractElement(Vec, S.Index,
869 return IRB.CreateShuffleVector(Vec, Mask,
Name +
".slice." +
Twine(S.Index));
872Value *LegalizeBufferContentTypesVisitor::insertSlice(
Value *Whole,
Value *Part,
875 auto *WholeVT = dyn_cast<FixedVectorType>(Whole->
getType());
878 if (S.Length == WholeVT->getNumElements() && S.Index == 0)
881 return IRB.CreateInsertElement(Whole, Part, S.Index,
884 int NumElems = cast<FixedVectorType>(Whole->
getType())->getNumElements();
892 Value *ExtPart = IRB.CreateShuffleVector(Part, ExtPartMask,
900 return IRB.CreateShuffleVector(Whole, ExtPart, Mask,
904bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
907 if (
auto *ST = dyn_cast<StructType>(PartType)) {
909 bool Changed =
false;
910 for (
auto [
I, ElemTy,
Offset] :
913 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
914 AggByteOff +
Offset.getFixedValue(), Result,
920 if (
auto *AT = dyn_cast<ArrayType>(PartType)) {
921 Type *ElemTy = AT->getElementType();
924 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
925 bool Changed =
false;
929 Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs,
940 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
941 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
944 getVecSlices(LegalType, Slices);
945 bool HasSlices = Slices.
size() > 1;
946 bool IsAggPart = !AggIdxs.
empty();
948 if (!HasSlices && !IsAggPart) {
949 Type *LoadableType = intrinsicTypeFor(LegalType);
950 if (LoadableType == PartType)
953 IRB.SetInsertPoint(&OrigLI);
954 auto *NLI = cast<LoadInst>(OrigLI.
clone());
955 NLI->mutateType(LoadableType);
956 NLI = IRB.Insert(NLI);
957 NLI->setName(
Name +
".loadable");
959 LoadsRes = IRB.CreateBitCast(NLI, LegalType,
Name +
".from.loadable");
961 IRB.SetInsertPoint(&OrigLI);
969 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
971 if (IsAggPart && Slices.
empty())
973 for (VecSlice S : Slices) {
976 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
978 Value *NewPtr = IRB.CreateGEP(
982 Type *LoadableType = intrinsicTypeFor(SliceType);
983 LoadInst *NewLI = IRB.CreateAlignedLoad(
991 Value *Loaded = IRB.CreateBitCast(NewLI, SliceType,
992 NewLI->
getName() +
".from.loadable");
993 LoadsRes = insertSlice(LoadsRes, Loaded, S,
Name);
996 if (LegalType != ArrayAsVecType)
997 LoadsRes = makeIllegalNonAggregate(LoadsRes, ArrayAsVecType,
Name);
998 if (ArrayAsVecType != PartType)
999 LoadsRes = vectorToArray(LoadsRes, PartType,
Name);
1002 Result = IRB.CreateInsertValue(Result, LoadsRes, AggIdxs,
Name);
1008bool LegalizeBufferContentTypesVisitor::visitLoadInst(
LoadInst &LI) {
1015 bool Changed = visitLoadImpl(LI, OrigType, AggIdxs, 0, Result, LI.
getName());
1024std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
1027 if (
auto *ST = dyn_cast<StructType>(PartType)) {
1029 bool Changed =
false;
1030 for (
auto [
I, ElemTy,
Offset] :
1033 Changed |= std::get<0>(visitStoreImpl(OrigSI, ElemTy, AggIdxs,
1034 AggByteOff +
Offset.getFixedValue(),
1038 return std::make_pair(Changed,
false);
1040 if (
auto *AT = dyn_cast<ArrayType>(PartType)) {
1041 Type *ElemTy = AT->getElementType();
1044 TypeSize ElemStoreSize =
DL.getTypeStoreSize(ElemTy);
1045 bool Changed =
false;
1049 Changed |= std::get<0>(visitStoreImpl(
1050 OrigSI, ElemTy, AggIdxs,
1054 return std::make_pair(Changed,
false);
1059 Value *NewData = OrigData;
1061 bool IsAggPart = !AggIdxs.
empty();
1063 NewData = IRB.CreateExtractValue(NewData, AggIdxs,
Name);
1065 Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType);
1066 if (ArrayAsVecType != PartType) {
1067 NewData = arrayToVector(NewData, ArrayAsVecType,
Name);
1070 Type *LegalType = legalNonAggregateFor(ArrayAsVecType);
1071 if (LegalType != ArrayAsVecType) {
1072 NewData = makeLegalNonAggregate(NewData, LegalType,
Name);
1076 getVecSlices(LegalType, Slices);
1077 bool NeedToSplit = Slices.
size() > 1 || IsAggPart;
1079 Type *StorableType = intrinsicTypeFor(LegalType);
1080 if (StorableType == PartType)
1081 return std::make_pair(
false,
false);
1082 NewData = IRB.CreateBitCast(NewData, StorableType,
Name +
".storable");
1084 return std::make_pair(
true,
true);
1089 if (IsAggPart && Slices.
empty())
1091 unsigned ElemBytes =
DL.getTypeStoreSize(ElemType);
1093 for (VecSlice S : Slices) {
1096 int64_t ByteOffset = AggByteOff + S.Index * ElemBytes;
1098 IRB.CreateGEP(IRB.getInt8Ty(), OrigPtr, IRB.getInt32(ByteOffset),
1101 Value *DataSlice = extractSlice(NewData, S,
Name);
1102 Type *StorableType = intrinsicTypeFor(SliceType);
1103 DataSlice = IRB.CreateBitCast(DataSlice, StorableType,
1104 DataSlice->
getName() +
".storable");
1105 auto *NewSI = cast<StoreInst>(OrigSI.
clone());
1108 NewSI->setOperand(0, DataSlice);
1109 NewSI->setOperand(1, NewPtr);
1112 return std::make_pair(
true,
false);
1115bool LegalizeBufferContentTypesVisitor::visitStoreInst(
StoreInst &SI) {
1118 IRB.SetInsertPoint(&SI);
1120 Value *OrigData =
SI.getValueOperand();
1121 auto [Changed, ModifiedInPlace] =
1122 visitStoreImpl(SI, OrigData->
getType(), AggIdxs, 0, OrigData->
getName());
1123 if (Changed && !ModifiedInPlace)
1124 SI.eraseFromParent();
1128bool LegalizeBufferContentTypesVisitor::processFunction(
Function &
F) {
1129 bool Changed =
false;
1138static std::pair<Constant *, Constant *>
1141 return std::make_pair(
C->getAggregateElement(0u),
C->getAggregateElement(1u));
1147 BufferFatPtrToStructTypeMap *TypeMap;
1159 FatPtrConstMaterializer(BufferFatPtrToStructTypeMap *TypeMap,
1162 InternalMapper(UnderlyingMap,
RF_None, TypeMap, this) {}
1163 virtual ~FatPtrConstMaterializer() =
default;
1170 Type *SrcTy =
C->getType();
1171 auto *NewTy = dyn_cast<StructType>(TypeMap->remapType(SrcTy));
1172 if (
C->isNullValue())
1173 return ConstantAggregateZero::getNullValue(NewTy);
1174 if (isa<PoisonValue>(
C)) {
1179 if (isa<UndefValue>(
C)) {
1185 if (
auto *VC = dyn_cast<ConstantVector>(
C)) {
1187 Constant *NewS = InternalMapper.mapConstant(*S);
1191 auto EC =
VC->getType()->getElementCount();
1197 for (
Value *
Op :
VC->operand_values()) {
1198 auto *NewOp = dyn_cast_or_null<Constant>(InternalMapper.mapValue(*
Op));
1210 if (isa<GlobalValue>(
C))
1212 "fat pointer) values are not supported");
1214 if (isa<ConstantExpr>(
C))
1216 "fat pointer) values should have been expanded earlier");
1221Value *FatPtrConstMaterializer::materialize(
Value *V) {
1229 return materializeBufferFatPtrConst(
C);
1237class SplitPtrStructs :
public InstVisitor<SplitPtrStructs, PtrParts> {
1280 void processConditionals();
1300 : TM(TM), IRB(Ctx) {}
1328void SplitPtrStructs::copyMetadata(
Value *Dest,
Value *Src) {
1329 auto *DestI = dyn_cast<Instruction>(Dest);
1330 auto *SrcI = dyn_cast<Instruction>(Src);
1332 if (!DestI || !SrcI)
1335 DestI->copyMetadata(*SrcI);
1340 "of something that wasn't rewritten");
1341 auto *RsrcEntry = &RsrcParts[
V];
1342 auto *OffEntry = &OffParts[
V];
1343 if (*RsrcEntry && *OffEntry)
1344 return {*RsrcEntry, *OffEntry};
1346 if (
auto *
C = dyn_cast<Constant>(V)) {
1348 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1352 if (
auto *
I = dyn_cast<Instruction>(V)) {
1356 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1359 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1360 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1361 }
else if (
auto *
A = dyn_cast<Argument>(V)) {
1362 IRB.SetInsertPointPastAllocas(
A->getParent());
1363 IRB.SetCurrentDebugLocation(
DebugLoc());
1365 Value *Rsrc = IRB.CreateExtractValue(V, 0,
V->getName() +
".rsrc");
1366 Value *
Off = IRB.CreateExtractValue(V, 1,
V->getName() +
".off");
1367 return {*RsrcEntry = Rsrc, *OffEntry =
Off};
1379 while (
auto *
GEP = dyn_cast<GEPOperator>(V))
1380 V =
GEP->getPointerOperand();
1381 while (
auto *ASC = dyn_cast<AddrSpaceCastOperator>(V))
1382 V = ASC->getPointerOperand();
1386void SplitPtrStructs::getPossibleRsrcRoots(
Instruction *
I,
1389 if (
auto *
PHI = dyn_cast<PHINode>(
I)) {
1392 for (
Value *In :
PHI->incoming_values()) {
1395 if (isa<PHINode, SelectInst>(In))
1396 getPossibleRsrcRoots(cast<Instruction>(In), Roots, Seen);
1398 }
else if (
auto *SI = dyn_cast<SelectInst>(
I)) {
1399 if (!Seen.
insert(SI).second)
1405 if (isa<PHINode, SelectInst>(TrueVal))
1406 getPossibleRsrcRoots(cast<Instruction>(TrueVal), Roots, Seen);
1407 if (isa<PHINode, SelectInst>(FalseVal))
1408 getPossibleRsrcRoots(cast<Instruction>(FalseVal), Roots, Seen);
1414void SplitPtrStructs::processConditionals() {
1420 Value *Rsrc = RsrcParts[
I];
1422 assert(Rsrc && Off &&
"must have visited conditionals by now");
1424 std::optional<Value *> MaybeRsrc;
1425 auto MaybeFoundRsrc = FoundRsrcs.
find(
I);
1426 if (MaybeFoundRsrc != FoundRsrcs.
end()) {
1427 MaybeRsrc = MaybeFoundRsrc->second;
1432 getPossibleRsrcRoots(
I, Roots, Seen);
1435 for (
Value *V : Roots)
1437 for (
Value *V : Seen)
1449 if (Diff.size() == 1) {
1450 Value *RootVal = *Diff.begin();
1454 MaybeRsrc = std::get<0>(getPtrParts(RootVal));
1456 MaybeRsrc = RootVal;
1461 if (
auto *
PHI = dyn_cast<PHINode>(
I)) {
1464 IRB.SetInsertPoint(*
PHI->getInsertionPointAfterDef());
1465 IRB.SetCurrentDebugLocation(
PHI->getDebugLoc());
1467 NewRsrc = *MaybeRsrc;
1470 auto *RsrcPHI = IRB.CreatePHI(RsrcTy,
PHI->getNumIncomingValues());
1471 RsrcPHI->takeName(Rsrc);
1472 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1473 Value *VRsrc = std::get<0>(getPtrParts(V));
1474 RsrcPHI->addIncoming(VRsrc, BB);
1476 copyMetadata(RsrcPHI,
PHI);
1481 auto *NewOff = IRB.CreatePHI(OffTy,
PHI->getNumIncomingValues());
1482 NewOff->takeName(Off);
1483 for (
auto [V, BB] :
llvm::zip(
PHI->incoming_values(),
PHI->blocks())) {
1484 assert(OffParts.count(V) &&
"An offset part had to be created by now");
1485 Value *VOff = std::get<1>(getPtrParts(V));
1486 NewOff->addIncoming(VOff, BB);
1488 copyMetadata(NewOff,
PHI);
1494 ConditionalTemps.push_back(cast<Instruction>(Rsrc));
1495 ConditionalTemps.push_back(cast<Instruction>(Off));
1497 Off->replaceAllUsesWith(NewOff);
1501 for (
Value *V : Seen)
1502 FoundRsrcs[cast<Instruction>(V)] = NewRsrc;
1503 }
else if (isa<SelectInst>(
I)) {
1505 ConditionalTemps.push_back(cast<Instruction>(Rsrc));
1507 for (
Value *V : Seen)
1508 FoundRsrcs[cast<Instruction>(V)] = *MaybeRsrc;
1516void SplitPtrStructs::killAndReplaceSplitInstructions(
1519 I->eraseFromParent();
1522 if (!SplitUsers.contains(
I))
1527 for (
auto *Dbg : Dbgs) {
1528 IRB.SetInsertPoint(Dbg);
1529 auto &
DL =
I->getDataLayout();
1531 "We should've RAUW'd away loads, stores, etc. at this point");
1532 auto *OffDbg = cast<DbgValueInst>(
Dbg->clone());
1533 copyMetadata(OffDbg, Dbg);
1534 auto [Rsrc,
Off] = getPtrParts(
I);
1536 int64_t RsrcSz =
DL.getTypeSizeInBits(Rsrc->
getType());
1537 int64_t OffSz =
DL.getTypeSizeInBits(
Off->getType());
1539 std::optional<DIExpression *> RsrcExpr =
1542 std::optional<DIExpression *> OffExpr =
1546 OffDbg->setExpression(*OffExpr);
1547 OffDbg->replaceVariableLocationOp(
I, Off);
1550 OffDbg->deleteValue();
1553 Dbg->setExpression(*RsrcExpr);
1554 Dbg->replaceVariableLocationOp(
I, Rsrc);
1561 I->replaceUsesWithIf(
Poison, [&](
const Use &U) ->
bool {
1562 if (
const auto *UI = dyn_cast<Instruction>(
U.getUser()))
1563 return SplitUsers.contains(UI);
1567 if (
I->use_empty()) {
1568 I->eraseFromParent();
1571 IRB.SetInsertPoint(*
I->getInsertionPointAfterDef());
1572 IRB.SetCurrentDebugLocation(
I->getDebugLoc());
1573 auto [Rsrc,
Off] = getPtrParts(
I);
1579 I->replaceAllUsesWith(
Struct);
1580 I->eraseFromParent();
1592 case AtomicOrdering::Release:
1593 case AtomicOrdering::AcquireRelease:
1594 case AtomicOrdering::SequentiallyConsistent:
1595 IRB.CreateFence(AtomicOrdering::Release, SSID);
1605 case AtomicOrdering::Acquire:
1606 case AtomicOrdering::AcquireRelease:
1607 case AtomicOrdering::SequentiallyConsistent:
1608 IRB.CreateFence(AtomicOrdering::Acquire, SSID);
1619 IRB.SetInsertPoint(
I);
1621 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1624 Args.push_back(Arg);
1625 Args.push_back(Rsrc);
1626 Args.push_back(Off);
1627 insertPreMemOpFence(Order, SSID);
1631 Args.push_back(IRB.getInt32(0));
1636 Args.push_back(IRB.getInt32(Aux));
1639 if (isa<LoadInst>(
I))
1640 IID = Order == AtomicOrdering::NotAtomic
1641 ? Intrinsic::amdgcn_raw_ptr_buffer_load
1642 : Intrinsic::amdgcn_raw_ptr_atomic_buffer_load;
1643 else if (isa<StoreInst>(
I))
1644 IID = Intrinsic::amdgcn_raw_ptr_buffer_store;
1645 else if (
auto *RMW = dyn_cast<AtomicRMWInst>(
I)) {
1646 switch (RMW->getOperation()) {
1648 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap;
1651 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_add;
1654 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub;
1657 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_and;
1660 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_or;
1663 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor;
1666 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax;
1669 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin;
1672 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax;
1675 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin;
1678 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd;
1681 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax;
1684 IID = Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin;
1688 "buffer resources and should've been expanded away");
1693 "should've been expanded away");
1698 "buffer resources and should've ben expanded away");
1708 auto *
Call = IRB.CreateIntrinsic(IID, Ty, Args);
1709 copyMetadata(Call,
I);
1710 setAlign(Call, Alignment, Arg ? 1 : 0);
1713 insertPostMemOpFence(Order, SSID);
1716 SplitUsers.insert(
I);
1717 I->replaceAllUsesWith(Call);
1722 return {
nullptr,
nullptr};
1727 return {
nullptr,
nullptr};
1731 return {
nullptr,
nullptr};
1736 return {
nullptr,
nullptr};
1737 Value *Arg =
SI.getValueOperand();
1738 handleMemoryInst(&SI, Arg,
SI.getPointerOperand(), Arg->
getType(),
1739 SI.getAlign(),
SI.getOrdering(),
SI.isVolatile(),
1740 SI.getSyncScopeID());
1741 return {
nullptr,
nullptr};
1746 return {
nullptr,
nullptr};
1751 return {
nullptr,
nullptr};
1759 return {
nullptr,
nullptr};
1760 IRB.SetInsertPoint(&AI);
1765 bool IsNonTemporal = AI.
getMetadata(LLVMContext::MD_nontemporal);
1767 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1768 insertPreMemOpFence(Order, SSID);
1776 IRB.CreateIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap, Ty,
1778 Off, IRB.getInt32(0), IRB.getInt32(Aux)});
1779 copyMetadata(Call, &AI);
1781 Call->takeName(&AI);
1782 insertPostMemOpFence(Order, SSID);
1785 Res = IRB.CreateInsertValue(Res, Call, 0);
1788 Res = IRB.CreateInsertValue(Res, Succeeded, 1);
1790 SplitUsers.insert(&AI);
1792 return {
nullptr,
nullptr};
1799 return {
nullptr,
nullptr};
1800 IRB.SetInsertPoint(&
GEP);
1802 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1804 bool IsNUW =
GEP.hasNoUnsignedWrap();
1805 bool IsNUSW =
GEP.hasNoUnsignedSignedWrap();
1810 if (
auto *VT = dyn_cast<VectorType>(
Off->getType()))
1811 FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount());
1812 GEP.mutateType(FatPtrTy);
1814 GEP.mutateType(
Ptr->getType());
1816 SplitUsers.insert(&
GEP);
1820 bool HasNonNegativeOff =
false;
1821 if (
auto *CI = dyn_cast<ConstantInt>(OffAccum)) {
1822 HasNonNegativeOff = !CI->isNegative();
1828 NewOff = IRB.CreateAdd(Off, OffAccum,
"",
1829 IsNUW || (IsNUSW && HasNonNegativeOff),
1832 copyMetadata(NewOff, &
GEP);
1834 SplitUsers.insert(&
GEP);
1835 return {Rsrc, NewOff};
1841 return {
nullptr,
nullptr};
1842 IRB.SetInsertPoint(&PI);
1847 auto [Rsrc,
Off] = getPtrParts(
Ptr);
1853 Res = IRB.CreateIntCast(Off, ResTy,
false,
1856 Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.
getName() +
".rsrc");
1857 Value *Shl = IRB.CreateShl(
1860 "", Width >= FatPtrWidth, Width > FatPtrWidth);
1861 Value *OffCast = IRB.CreateIntCast(Off, ResTy,
false,
1863 Res = IRB.CreateOr(Shl, OffCast);
1866 copyMetadata(Res, &PI);
1868 SplitUsers.insert(&PI);
1870 return {
nullptr,
nullptr};
1875 return {
nullptr,
nullptr};
1876 IRB.SetInsertPoint(&IP);
1885 Type *RsrcTy =
RetTy->getElementType(0);
1887 Value *RsrcPart = IRB.CreateLShr(
1890 Value *RsrcInt = IRB.CreateIntCast(RsrcPart, RsrcIntTy,
false);
1891 Value *Rsrc = IRB.CreateIntToPtr(RsrcInt, RsrcTy, IP.
getName() +
".rsrc");
1893 IRB.CreateIntCast(
Int, OffTy,
false, IP.
getName() +
".off");
1895 copyMetadata(Rsrc, &IP);
1896 SplitUsers.insert(&IP);
1902 return {
nullptr,
nullptr};
1903 IRB.SetInsertPoint(&
I);
1906 if (
In->getType() ==
I.getType()) {
1907 auto [Rsrc,
Off] = getPtrParts(In);
1908 SplitUsers.insert(&
I);
1913 "buffer fat pointers (addrspace 7)");
1914 Type *OffTy = cast<StructType>(
I.getType())->getElementType(1);
1916 SplitUsers.insert(&
I);
1917 return {
In, ZeroOff};
1923 return {
nullptr,
nullptr};
1925 IRB.SetInsertPoint(&Cmp);
1928 assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1929 "Pointer comparison is only equal or unequal");
1930 auto [LhsRsrc, LhsOff] = getPtrParts(Lhs);
1931 auto [RhsRsrc, RhsOff] = getPtrParts(Rhs);
1933 IRB.CreateICmp(Pred, LhsRsrc, RhsRsrc,
Cmp.getName() +
".rsrc");
1934 copyMetadata(RsrcCmp, &Cmp);
1935 Value *OffCmp = IRB.CreateICmp(Pred, LhsOff, RhsOff,
Cmp.getName() +
".off");
1936 copyMetadata(OffCmp, &Cmp);
1938 Value *Res =
nullptr;
1939 if (Pred == ICmpInst::ICMP_EQ)
1940 Res = IRB.CreateAnd(RsrcCmp, OffCmp);
1941 else if (Pred == ICmpInst::ICMP_NE)
1942 Res = IRB.CreateOr(RsrcCmp, OffCmp);
1943 copyMetadata(Res, &Cmp);
1945 SplitUsers.insert(&Cmp);
1946 Cmp.replaceAllUsesWith(Res);
1947 return {
nullptr,
nullptr};
1952 return {
nullptr,
nullptr};
1953 IRB.SetInsertPoint(&
I);
1954 auto [Rsrc,
Off] = getPtrParts(
I.getOperand(0));
1956 Value *RsrcRes = IRB.CreateFreeze(Rsrc,
I.getName() +
".rsrc");
1957 copyMetadata(RsrcRes, &
I);
1958 Value *OffRes = IRB.CreateFreeze(Off,
I.getName() +
".off");
1959 copyMetadata(OffRes, &
I);
1960 SplitUsers.insert(&
I);
1961 return {RsrcRes, OffRes};
1966 return {
nullptr,
nullptr};
1967 IRB.SetInsertPoint(&
I);
1968 Value *Vec =
I.getVectorOperand();
1970 auto [Rsrc,
Off] = getPtrParts(Vec);
1972 Value *RsrcRes = IRB.CreateExtractElement(Rsrc,
Idx,
I.getName() +
".rsrc");
1973 copyMetadata(RsrcRes, &
I);
1974 Value *OffRes = IRB.CreateExtractElement(Off,
Idx,
I.getName() +
".off");
1975 copyMetadata(OffRes, &
I);
1976 SplitUsers.insert(&
I);
1977 return {RsrcRes, OffRes};
1984 return {
nullptr,
nullptr};
1985 IRB.SetInsertPoint(&
I);
1986 Value *Vec =
I.getOperand(0);
1987 Value *Elem =
I.getOperand(1);
1989 auto [VecRsrc, VecOff] = getPtrParts(Vec);
1990 auto [ElemRsrc, ElemOff] = getPtrParts(Elem);
1993 IRB.CreateInsertElement(VecRsrc, ElemRsrc,
Idx,
I.getName() +
".rsrc");
1994 copyMetadata(RsrcRes, &
I);
1996 IRB.CreateInsertElement(VecOff, ElemOff,
Idx,
I.getName() +
".off");
1997 copyMetadata(OffRes, &
I);
1998 SplitUsers.insert(&
I);
1999 return {RsrcRes, OffRes};
2005 return {
nullptr,
nullptr};
2006 IRB.SetInsertPoint(&
I);
2008 Value *V1 =
I.getOperand(0);
2011 auto [V1Rsrc, V1Off] = getPtrParts(V1);
2012 auto [V2Rsrc, V2Off] = getPtrParts(V2);
2015 IRB.CreateShuffleVector(V1Rsrc, V2Rsrc, Mask,
I.getName() +
".rsrc");
2016 copyMetadata(RsrcRes, &
I);
2018 IRB.CreateShuffleVector(V1Off, V2Off, Mask,
I.getName() +
".off");
2019 copyMetadata(OffRes, &
I);
2020 SplitUsers.insert(&
I);
2021 return {RsrcRes, OffRes};
2026 return {
nullptr,
nullptr};
2027 IRB.SetInsertPoint(*
PHI.getInsertionPointAfterDef());
2033 Value *TmpRsrc = IRB.CreateExtractValue(&
PHI, 0,
PHI.getName() +
".rsrc");
2034 Value *TmpOff = IRB.CreateExtractValue(&
PHI, 1,
PHI.getName() +
".off");
2035 Conditionals.push_back(&
PHI);
2036 SplitUsers.insert(&
PHI);
2037 return {TmpRsrc, TmpOff};
2042 return {
nullptr,
nullptr};
2043 IRB.SetInsertPoint(&SI);
2046 Value *True =
SI.getTrueValue();
2047 Value *False =
SI.getFalseValue();
2048 auto [TrueRsrc, TrueOff] = getPtrParts(True);
2049 auto [FalseRsrc, FalseOff] = getPtrParts(False);
2052 IRB.CreateSelect(
Cond, TrueRsrc, FalseRsrc,
SI.getName() +
".rsrc", &SI);
2053 copyMetadata(RsrcRes, &SI);
2054 Conditionals.push_back(&SI);
2056 IRB.CreateSelect(
Cond, TrueOff, FalseOff,
SI.getName() +
".off", &SI);
2057 copyMetadata(OffRes, &SI);
2058 SplitUsers.insert(&SI);
2059 return {RsrcRes, OffRes};
2070 case Intrinsic::ptrmask:
2071 case Intrinsic::invariant_start:
2072 case Intrinsic::invariant_end:
2073 case Intrinsic::launder_invariant_group:
2074 case Intrinsic::strip_invariant_group:
2084 case Intrinsic::ptrmask: {
2087 return {
nullptr,
nullptr};
2089 IRB.SetInsertPoint(&
I);
2090 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2091 if (
Mask->getType() !=
Off->getType())
2093 "pointer (data layout not set up correctly?)");
2094 Value *OffRes = IRB.CreateAnd(Off, Mask,
I.getName() +
".off");
2095 copyMetadata(OffRes, &
I);
2096 SplitUsers.insert(&
I);
2097 return {Rsrc, OffRes};
2101 case Intrinsic::invariant_start: {
2104 return {
nullptr,
nullptr};
2105 IRB.SetInsertPoint(&
I);
2106 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2108 auto *NewRsrc = IRB.CreateIntrinsic(IID, {NewTy}, {
I.getOperand(0), Rsrc});
2109 copyMetadata(NewRsrc, &
I);
2111 SplitUsers.insert(&
I);
2112 I.replaceAllUsesWith(NewRsrc);
2113 return {
nullptr,
nullptr};
2115 case Intrinsic::invariant_end: {
2116 Value *RealPtr =
I.getArgOperand(2);
2118 return {
nullptr,
nullptr};
2119 IRB.SetInsertPoint(&
I);
2120 Value *RealRsrc = getPtrParts(RealPtr).first;
2121 Value *InvPtr =
I.getArgOperand(0);
2123 Value *NewRsrc = IRB.CreateIntrinsic(IID, {RealRsrc->
getType()},
2124 {InvPtr,
Size, RealRsrc});
2125 copyMetadata(NewRsrc, &
I);
2127 SplitUsers.insert(&
I);
2128 I.replaceAllUsesWith(NewRsrc);
2129 return {
nullptr,
nullptr};
2131 case Intrinsic::launder_invariant_group:
2132 case Intrinsic::strip_invariant_group: {
2135 return {
nullptr,
nullptr};
2136 IRB.SetInsertPoint(&
I);
2137 auto [Rsrc,
Off] = getPtrParts(
Ptr);
2138 Value *NewRsrc = IRB.CreateIntrinsic(IID, {Rsrc->
getType()}, {Rsrc});
2139 copyMetadata(NewRsrc, &
I);
2141 SplitUsers.insert(&
I);
2142 return {NewRsrc,
Off};
2145 return {
nullptr,
nullptr};
2148void SplitPtrStructs::processFunction(
Function &
F) {
2151 LLVM_DEBUG(
dbgs() <<
"Splitting pointer structs in function: " <<
F.getName()
2154 Originals.push_back(&
I);
2157 assert(((Rsrc && Off) || (!Rsrc && !Off)) &&
2158 "Can't have a resource but no offset");
2160 RsrcParts[
I] = Rsrc;
2164 processConditionals();
2165 killAndReplaceSplitInstructions(Originals);
2171 Conditionals.clear();
2172 ConditionalTemps.clear();
2176class AMDGPULowerBufferFatPointers :
public ModulePass {
2196 BufferFatPtrToStructTypeMap *TypeMap) {
2197 bool HasFatPointers =
false;
2200 HasFatPointers |= (
I.getType() != TypeMap->remapType(
I.getType()));
2201 return HasFatPointers;
2205 BufferFatPtrToStructTypeMap *TypeMap) {
2206 Type *Ty =
F.getFunctionType();
2207 return Ty != TypeMap->remapType(Ty);
2224 while (!OldF->
empty()) {
2238 CloneMap[&NewArg] = &OldArg;
2239 NewArg.takeName(&OldArg);
2240 Type *OldArgTy = OldArg.getType(), *NewArgTy = NewArg.getType();
2242 NewArg.mutateType(OldArgTy);
2243 OldArg.replaceAllUsesWith(&NewArg);
2244 NewArg.mutateType(NewArgTy);
2248 if (OldArgTy != NewArgTy && !IsIntrinsic)
2268 CloneMap[&BB] = &BB;
2275 bool Changed =
false;
2282 BufferFatPtrToStructTypeMap StructTM(
DL);
2283 BufferFatPtrToIntTypeMap IntTM(
DL);
2287 "space (7) are not supported");
2288 Type *VT = GV.getValueType();
2289 if (VT != StructTM.remapType(VT))
2291 "(address space 7 pointers) are unsupported. Use "
2292 "buffer resource pointers (address space 8) instead.");
2301 if (isa<ConstantExpr>(
Op) || isa<ConstantAggregate>(
Op))
2307 while (!Worklist.
empty()) {
2309 if (!Visited.
insert(
C).second)
2314 if (isa<ConstantExpr>(
Op) || isa<ConstantAggregate>(
Op))
2325 StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM,
M.getContext());
2326 LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(
DL,
2331 Changed |= MemOpsRewrite.processFunction(
F);
2332 if (InterfaceChange || BodyChanges) {
2333 NeedsRemap.
push_back(std::make_pair(&
F, InterfaceChange));
2334 Changed |= BufferContentsTypeRewrite.processFunction(
F);
2337 if (NeedsRemap.
empty())
2344 FatPtrConstMaterializer Materializer(&StructTM, CloneMap);
2347 for (
auto [
F, InterfaceChange] : NeedsRemap) {
2349 if (InterfaceChange)
2351 F, cast<FunctionType>(StructTM.remapType(
F->getFunctionType())),
2355 LowerInFuncs.remapFunction(*NewF);
2360 if (InterfaceChange) {
2361 F->replaceAllUsesWith(NewF);
2362 F->eraseFromParent();
2370 SplitPtrStructs Splitter(
M.getContext(), &TM);
2372 Splitter.processFunction(*
F);
2375 F->eraseFromParent();
2379 F->replaceAllUsesWith(*NewF);
2385bool AMDGPULowerBufferFatPointers::runOnModule(
Module &M) {
2391char AMDGPULowerBufferFatPointers::ID = 0;
2395void AMDGPULowerBufferFatPointers::getAnalysisUsage(
AnalysisUsage &AU)
const {
2399#define PASS_DESC "Lower buffer fat pointer operations to buffer resources"
2408 return new AMDGPULowerBufferFatPointers();
static Function * moveFunctionAdaptingType(Function *OldF, FunctionType *NewTy, ValueToValueMapTy &CloneMap)
Move the body of OldF into a new function, returning it.
static void makeCloneInPraceMap(Function *F, ValueToValueMapTy &CloneMap)
static bool isBufferFatPtrOrVector(Type *Ty)
static bool isSplitFatPtr(Type *Ty)
std::pair< Value *, Value * > PtrParts
static bool hasFatPointerInterface(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
static bool isRemovablePointerIntrinsic(Intrinsic::ID IID)
Returns true if this intrinsic needs to be removed when it is applied to ptr addrspace(7) values.
static bool containsBufferFatPointers(const Function &F, BufferFatPtrToStructTypeMap *TypeMap)
Returns true if there are values that have a buffer fat pointer in them, which means we'll need to pe...
static Value * rsrcPartRoot(Value *V)
Returns the instruction that defines the resource part of the value V.
static constexpr unsigned BufferOffsetWidth
static bool isBufferFatPtrConst(Constant *C)
static std::pair< Constant *, Constant * > splitLoweredFatBufferConst(Constant *C)
Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered buffer fat pointer const...
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
Atomic ordering constants.
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
AttributeSet removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const
Remove the specified attributes from this set.
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
void removeFromParent()
Unlink 'this' from the containing function, but do not delete it.
void insertInto(Function *Parent, BasicBlock *InsertBefore=nullptr)
Insert unlinked basic block into a function.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
static Constant * get(StructType *T, ArrayRef< Constant * > V)
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Implements a dense probed hash-table based set.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & front() const
iterator_range< arg_iterator > args()
bool IsNewDbgInfoFormat
Is this function using intrinsics to record the position of debugging information,...
AttributeList getAttributes() const
Return the attribute list for this Function.
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void updateAfterNameChange()
Update internal caches that depend on the function name (such as the intrinsic ID and libcall cache).
Type * getReturnType() const
Returns the type of the ret val.
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
static GEPNoWrapFlags noUnsignedWrap()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
void copyMetadata(const GlobalObject *Src, unsigned Offset)
Copy metadata from Src, adjusting offsets by Offset.
LinkageTypes getLinkage() const
void setDLLStorageClass(DLLStorageClassTypes C)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
DLLStorageClassTypes getDLLStorageClass() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
Base class for instruction visitors.
RetTy visitFreezeInst(FreezeInst &I)
RetTy visitPtrToIntInst(PtrToIntInst &I)
RetTy visitExtractElementInst(ExtractElementInst &I)
RetTy visitIntrinsicInst(IntrinsicInst &I)
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I)
RetTy visitIntToPtrInst(IntToPtrInst &I)
RetTy visitPHINode(PHINode &I)
RetTy visitStoreInst(StoreInst &I)
RetTy visitInsertElementInst(InsertElementInst &I)
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
RetTy visitAddrSpaceCastInst(AddrSpaceCastInst &I)
RetTy visitAllocaInst(AllocaInst &I)
RetTy visitICmpInst(ICmpInst &I)
RetTy visitSelectInst(SelectInst &I)
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This class represents a cast from an integer to a pointer.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
ArrayRef< value_type > getArrayRef() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Value * getValueOperand()
Value * getPointerOperand()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
MutableArrayRef< TypeSize > getMemberOffsets()
Class to represent struct types.
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
bool isLiteral() const
Return true if this type is uniqued by structural equivalence, false if it is a struct definition.
Type * getElementType(unsigned N) const
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getArrayElementType() const
ArrayRef< Type * > subtypes() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
static IntegerType * getInt16Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
This is a class that can be implemented by clients to remap types when cloning constants and instruct...
virtual Type * remapType(Type *SrcTy)=0
The client should implement this method if they want to remap types while mapping values.
Context for (re-)mapping values (and metadata).
This is a class that can be implemented by clients to materialize Values on demand.
virtual Value * materialize(Value *V)=0
This method can be implemented to generate a mapped Value on demand.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
constexpr ScalarTy getFixedValue() const
self_iterator getIterator()
iterator insertAfter(iterator where, pointer New)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
std::optional< Function * > remangleIntrinsicFunction(Function *F)
bool match(Val *V, const Pattern &P)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
ModulePass * createAMDGPULowerBufferFatPointersPass()
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
char & AMDGPULowerBufferFatPointersID
AtomicOrdering
Atomic ordering for LLVM's memory model.
S1Ty set_difference(const S1Ty &S1, const S2Ty &S2)
set_difference(A, B) - Return A - B
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.