54#include "llvm/IR/IntrinsicsX86.h"
65using namespace PatternMatch;
67#define DEBUG_TYPE "lower-amx-type"
71 m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(
m_Value())) ||
72 match(
II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(
m_Value()));
92 auto *
II = dyn_cast<IntrinsicInst>(
I);
102 if (V->getType()->isX86_AMXTy())
112 if (
I.getType()->isX86_AMXTy())
124 unsigned AllocaAS =
DL.getAllocaAddrSpace();
126 new AllocaInst(Ty, AllocaAS,
"",
F.getEntryBlock().begin());
133 if (!isa<AllocaInst>(&
I))
145 std::map<Value *, Value *> Col2Row, Row2Col;
156 unsigned Granularity) {
157 if (Col2Row.count(V))
160 Value *RealRow =
nullptr;
161 if (isa<ConstantInt>(V))
163 Builder.
getInt16((cast<ConstantInt>(V)->getSExtValue()) / Granularity);
164 else if (isa<Instruction>(V)) {
179 cast<Instruction>(RealRow)->moveAfter(cast<Instruction>(V));
187 Col2Row[V] = RealRow;
192 unsigned Granularity) {
193 if (Row2Col.count(V))
196 Value *RealCol =
nullptr;
197 if (isa<ConstantInt>(V))
199 Builder.
getInt16((cast<ConstantInt>(V)->getSExtValue()) * Granularity);
200 else if (isa<Instruction>(V)) {
203 cast<Instruction>(RealCol)->moveAfter(cast<Instruction>(V));
211 Row2Col[V] = RealCol;
220 Value *Row =
nullptr, *Col =
nullptr;
221 switch (
II->getIntrinsicID()) {
224 case Intrinsic::x86_t2rpntlvwz0_internal:
225 case Intrinsic::x86_t2rpntlvwz0t1_internal:
226 case Intrinsic::x86_t2rpntlvwz1_internal:
227 case Intrinsic::x86_t2rpntlvwz1t1_internal:
228 case Intrinsic::x86_tileloadd64_internal:
229 case Intrinsic::x86_tileloaddt164_internal:
230 case Intrinsic::x86_tilestored64_internal:
231 case Intrinsic::x86_t2rpntlvwz0rs_internal:
232 case Intrinsic::x86_t2rpntlvwz0rst1_internal:
233 case Intrinsic::x86_t2rpntlvwz1rs_internal:
234 case Intrinsic::x86_t2rpntlvwz1rst1_internal:
235 case Intrinsic::x86_tileloaddrs64_internal:
236 case Intrinsic::x86_tileloaddrst164_internal: {
237 Row =
II->getArgOperand(0);
238 Col =
II->getArgOperand(1);
243 case Intrinsic::x86_tcmmimfp16ps_internal:
244 case Intrinsic::x86_tcmmrlfp16ps_internal:
245 case Intrinsic::x86_tdpbssd_internal:
246 case Intrinsic::x86_tdpbsud_internal:
247 case Intrinsic::x86_tdpbusd_internal:
248 case Intrinsic::x86_tdpbuud_internal:
249 case Intrinsic::x86_tdpbf16ps_internal:
250 case Intrinsic::x86_tdpfp16ps_internal:
251 case Intrinsic::x86_tmmultf32ps_internal:
252 case Intrinsic::x86_tdpbf8ps_internal:
253 case Intrinsic::x86_tdpbhf8ps_internal:
254 case Intrinsic::x86_tdphbf8ps_internal:
255 case Intrinsic::x86_tdphf8ps_internal: {
258 Row =
II->getArgOperand(0);
259 Col =
II->getArgOperand(1);
262 Row =
II->getArgOperand(0);
263 Col =
II->getArgOperand(2);
267 Col =
II->getArgOperand(1);
272 case Intrinsic::x86_ttransposed_internal:
273 case Intrinsic::x86_tconjtfp16_internal: {
274 assert((OpNo == 2) &&
"Illegal Operand Number.");
279 case Intrinsic::x86_tcvtrowd2ps_internal:
280 case Intrinsic::x86_tcvtrowps2bf16h_internal:
281 case Intrinsic::x86_tcvtrowps2bf16l_internal:
282 case Intrinsic::x86_tcvtrowps2phh_internal:
283 case Intrinsic::x86_tcvtrowps2phl_internal:
284 case Intrinsic::x86_tilemovrow_internal: {
285 assert(OpNo == 2 &&
"Illegal Operand Number.");
286 Row =
II->getArgOperand(0);
287 Col =
II->getArgOperand(1);
290 case Intrinsic::x86_ttdpbf16ps_internal:
291 case Intrinsic::x86_ttdpfp16ps_internal:
292 case Intrinsic::x86_ttcmmimfp16ps_internal:
293 case Intrinsic::x86_ttcmmrlfp16ps_internal:
294 case Intrinsic::x86_tconjtcmmimfp16ps_internal:
295 case Intrinsic::x86_ttmmultf32ps_internal: {
298 Row =
II->getArgOperand(0);
299 Col =
II->getArgOperand(1);
307 Col =
II->getArgOperand(1);
314 return std::make_pair(Row, Col);
318 Use &U = *(Phi->use_begin());
319 unsigned OpNo = U.getOperandNo();
320 User *V = U.getUser();
326 if (
isAMXCast(dyn_cast<Instruction>(V))) {
329 Use &U = *(V->use_begin());
330 OpNo = U.getOperandNo();
333 return getShape(cast<IntrinsicInst>(V), OpNo);
334 }
else if (isa<PHINode>(V)) {
337 Use &U = *(V->use_begin());
344 return std::make_pair(
nullptr,
nullptr);
348class X86LowerAMXType {
355 std::map<Value *, Value *> Col2Row, Row2Col;
371 Value *Row =
nullptr, *Col =
nullptr;
373 unsigned OpNo =
U.getOperandNo();
374 auto *
II = cast<IntrinsicInst>(
U.getUser());
375 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
378 Value *Stride = Builder.getInt64(64);
379 Value *I8Ptr =
LD->getOperand(0);
380 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
383 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
384 Bitcast->replaceAllUsesWith(NewInst);
397 auto *
II = cast<IntrinsicInst>(Tile);
400 Value *Row =
II->getOperand(0);
401 Value *Col =
II->getOperand(1);
405 Value *Stride = Builder.getInt64(64);
406 Value *I8Ptr =
ST->getOperand(1);
407 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Tile};
408 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
420 Value *Vec = Builder.CreateLoad(
Bitcast->getType(),
ST->getOperand(1));
421 Bitcast->replaceAllUsesWith(Vec);
425bool X86LowerAMXType::transformBitcast(
BitCastInst *Bitcast) {
428 Value *I8Ptr, *Stride;
429 auto *Src =
Bitcast->getOperand(0);
431 auto Prepare = [&](
Type *MemTy) {
434 Stride = Builder.getInt64(64);
437 if (
Bitcast->getType()->isX86_AMXTy()) {
447 unsigned OpNo =
U.getOperandNo();
448 auto *
II = dyn_cast<IntrinsicInst>(
U.getUser());
451 Prepare(
Bitcast->getOperand(0)->getType());
452 Builder.CreateStore(Src, AllocaAddr);
454 Value *Row =
nullptr, *Col =
nullptr;
455 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
456 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
458 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
459 Bitcast->replaceAllUsesWith(NewInst);
468 auto *
II = dyn_cast<IntrinsicInst>(Src);
472 Value *Row =
II->getOperand(0);
473 Value *Col =
II->getOperand(1);
474 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Src};
475 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
476 Value *NewInst = Builder.CreateLoad(
Bitcast->getType(), AllocaAddr);
477 Bitcast->replaceAllUsesWith(NewInst);
483bool X86LowerAMXType::visit() {
489 auto *
Bitcast = dyn_cast<BitCastInst>(&Inst);
494 if (
Bitcast->getType()->isX86_AMXTy()) {
501 if (transformBitcast(Bitcast))
521 combineLoadBitcast(LD, Bitcast);
525 }
else if (Src->getType()->isX86_AMXTy()) {
532 ST = dyn_cast<StoreInst>(
U.getUser());
537 if (transformBitcast(Bitcast))
561 combineBitcastStore(Bitcast, ST);
569 bool C = !DeadInsts.
empty();
571 for (
auto *Inst : DeadInsts)
572 Inst->eraseFromParent();
582 unsigned AllocaAS =
DL.getAllocaAddrSpace();
583 Type *V256I32Ty = VectorType::get(Builder.
getInt32Ty(), 256,
false);
585 new AllocaInst(V256I32Ty, AllocaAS,
"",
F->getEntryBlock().begin());
595 auto *
II = dyn_cast<IntrinsicInst>(TileDef);
598 if (
auto *Extr = dyn_cast<ExtractValueInst>(TileDef)) {
599 assert(Extr->hasIndices() &&
"Tile extract miss index!");
600 Idx = Extr->getIndices()[0];
601 II = cast<IntrinsicInst>(Extr->getOperand(0));
604 assert(
II &&
"Not tile intrinsic!");
612 std::array<Value *, 5> Args = {Row, Col,
Ptr, Stride, TileDef};
615 Builder.
CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {}, Args);
621 assert(V->getType()->isX86_AMXTy() &&
"Not define tile!");
627 Value *PhiOp = cast<PHINode>(V)->getIncomingValue(0);
628 II = cast<IntrinsicInst>(PhiOp);
629 }
else if (
auto *Extr = dyn_cast<ExtractValueInst>(V)) {
631 assert(Extr->hasIndices() &&
"Tile extract miss index!");
632 Idx = Extr->getIndices()[0];
633 II = cast<IntrinsicInst>(Extr->getOperand(0));
635 II = cast<IntrinsicInst>(V);
640 Instruction *UserI = cast<Instruction>(U.getUser());
643 std::array<Value *, 4> Args = {Row, Col,
Ptr, Stride};
646 Builder.
CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {}, Args);
651 for (
Use &U :
I->uses()) {
652 User *V = U.getUser();
662class X86VolatileTileData {
670 bool volatileTileData();
675Value *X86VolatileTileData::updatePhiIncomings(
679 for (
auto *
I : Incomings) {
683 for (
Use &U :
I->uses()) {
685 if (isa<PHINode>(V) || V == Store)
695 for (
Use &U :
PHI->uses())
697 PHI->eraseFromParent();
755void X86VolatileTileData::volatileTilePHI(
PHINode *
PHI) {
759 for (
unsigned I = 0, E =
PHI->getNumIncomingValues();
I != E; ++
I) {
762 assert(Inst &&
"We shouldn't fold AMX instrution!");
766 Value *StorePtr = updatePhiIncomings(BB, Incomings);
767 replacePhiDefWithLoad(
PHI, StorePtr);
786void X86VolatileTileData::volatileTileNonPHI(
Instruction *
I) {
792 for (
Use &U :
I->uses()) {
794 assert(!isa<PHINode>(V) &&
"PHI Nodes should be excluded!");
812bool X86VolatileTileData::volatileTileData() {
813 bool Changed =
false;
819 if (!
I.getType()->isX86_AMXTy())
821 if (isa<PHINode>(&
I))
831 volatileTileNonPHI(
I);
836 volatileTilePHI(dyn_cast<PHINode>(
I));
847class X86LowerAMXCast {
850 std::unique_ptr<DominatorTree> DT;
854 :
Func(
F),
SC(ShapeC), DT(nullptr) {}
860 bool transformAllAMXCast();
874 for (
unsigned i = 0, e =
I->getNumOperands(); i != e; ++i) {
875 Value *OpV =
I->getOperand(i);
876 I->setOperand(i,
nullptr);
884 if (
Instruction *OpI = dyn_cast<Instruction>(OpV)) {
890 I->eraseFromParent();
904bool X86LowerAMXCast::optimizeAMXCastFromPhi(
909 Type *SrcTy = Src->getType();
921 while (!PhiWorklist.
empty()) {
923 for (
unsigned I = 0;
I < OldPN->getNumOperands(); ++
I) {
924 Value *IncValue = OldPN->getIncomingValue(
I);
927 if (isa<Constant>(IncValue)) {
928 auto *IncConst = dyn_cast<Constant>(IncValue);
929 if (!isa<UndefValue>(IncValue) && !IncConst->isZeroValue())
931 Value *Row =
nullptr, *Col =
nullptr;
932 std::tie(Row, Col) =
SC->getShape(OldPN);
935 if (!Row || !Col || !isa<Constant>(Row) || !isa<Constant>(Col))
938 auto *
Block = OldPN->getIncomingBlock(
I);
941 Intrinsic::x86_tilezero_internal, {}, {Row, Col});
943 NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
944 {IncValue->
getType()}, {NewInst});
947 OldPN->setIncomingValue(
I, NewInst);
951 if (
auto *PNode = dyn_cast<PHINode>(IncValue)) {
952 if (OldPhiNodes.
insert(PNode))
956 Instruction *ACI = dyn_cast<Instruction>(IncValue);
961 if (TyA != DestTy || TyB != SrcTy)
971 for (
auto *OldPN : OldPhiNodes) {
978 if (TyA != DestTy || TyB != SrcTy)
980 }
else if (
auto *
PHI = dyn_cast<PHINode>(V)) {
999 if (OldPhiNodes.count(
PHI) == 0)
1008 for (
auto *OldPN : OldPhiNodes) {
1009 Builder.SetInsertPoint(OldPN);
1010 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
1011 NewPNodes[OldPN] = NewPN;
1015 for (
auto *OldPN : OldPhiNodes) {
1016 PHINode *NewPN = NewPNodes[OldPN];
1017 for (
unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
1018 Value *
V = OldPN->getOperand(j);
1019 Value *NewV =
nullptr;
1024 else if (
auto *PrevPN = dyn_cast<PHINode>(V))
1025 NewV = NewPNodes[PrevPN];
1027 NewPN->
addIncoming(NewV, OldPN->getIncomingBlock(j));
1039 for (
auto *OldPN : OldPhiNodes) {
1040 PHINode *NewPN = NewPNodes[OldPN];
1046 assert(TyA == DestTy && TyB == SrcTy);
1051 }
else if (
auto *
PHI = dyn_cast<PHINode>(V)) {
1063static Value *getShapeFromAMXIntrinsic(
Value *Inst,
unsigned ShapeIdx,
1068 auto *
II = cast<IntrinsicInst>(Inst);
1070 return II->getOperand(0);
1072 assert(ShapeIdx < 2 &&
"Currently 2 shapes in 1 instruction at most!");
1073 return II->getOperand(ShapeIdx + 1);
1084 assert(Tile->getType()->isX86_AMXTy() &&
"Not Tile Operand!");
1087 if (Tile->getNumUses() != 1)
1093 Value *Row =
nullptr;
1094 Value *Col =
nullptr;
1097 auto *
II = cast<IntrinsicInst>(Tile);
1100 Row =
II->getOperand(0);
1101 Col =
II->getOperand(1);
1114 auto *
II = cast<ExtractValueInst>(Tile);
1115 assert(
II &&
"We meet unhandle source in fetching tile value!");
1116 unsigned ShapeIdx =
II->getIndices()[0];
1117 Value *Tiles =
II->getOperand(0);
1118 Row = getShapeFromAMXIntrinsic(Tiles, ShapeIdx,
true);
1119 Col = getShapeFromAMXIntrinsic(Tiles, ShapeIdx,
false);
1121 assert(Row && Col &&
"Shape got failed!");
1124 Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
1125 Value *I8Ptr = Builder.CreateBitCast(
ST->getOperand(1), Builder.getPtrTy());
1126 std::array<Value *, 5>
Args = {Row, Col, I8Ptr, Stride, Tile};
1127 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
1137 bool EraseLoad =
true;
1138 Value *Row =
nullptr, *Col =
nullptr;
1140 unsigned OpNo =
U.getOperandNo();
1141 auto *
II = cast<IntrinsicInst>(
U.getUser());
1146 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
1149 Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
1160 Builder.SetInsertPoint(&*std::next(
LD->getIterator()));
1161 Builder.CreateStore(LD, AllocaAddr);
1163 Builder.SetInsertPoint(Cast);
1164 I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
1167 I8Ptr = Builder.CreateBitCast(
LD->getOperand(0), Builder.getPtrTy());
1169 std::array<Value *, 4>
Args = {Row, Col, I8Ptr, Stride};
1172 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
1179 bool Change =
false;
1180 for (
auto *Cast : Casts) {
1181 auto *
II = cast<IntrinsicInst>(Cast);
1187 if (
II->getIntrinsicID() == Intrinsic::x86_cast_tile_to_vector) {
1193 if (combineCastStore(cast<IntrinsicInst>(Cast), Store)) {
1198 for (
auto *Store : DeadStores)
1199 Store->eraseFromParent();
1203 if (!Load || !
Load->hasOneUse())
1210 if (combineLoadCast(cast<IntrinsicInst>(Cast), Load)) {
1213 Load->eraseFromParent();
1221 bool Change =
false;
1231 m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(
m_Value(Vec))))
1233 else if (
match(&
I, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(
1240 for (
auto *Inst : Insts) {
1243 if (!
II ||
II->getIntrinsicID() != IID)
1252 II->replaceAllUsesWith(Inst->getOperand(0));
1258 Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector);
1259 Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile);
1263 for (
auto *Inst : Insts) {
1265 Inst->eraseFromParent();
1273 EraseInst(Vec2TileInsts);
1274 EraseInst(Tile2VecInsts);
1275 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after combine "
1276 "Vec2Tile and Tile2Vec:\n";
1278 Change |= combineLdSt(LiveCasts);
1279 EraseInst(LiveCasts);
1280 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after combine "
1281 "AMXCast and load/store:\n";
1288 if (isa<PHINode>(
I.getOperand(0)))
1293 for (
auto *
I : PhiCastWorkList) {
1297 PHINode *PN = cast<PHINode>(
I->getOperand(0));
1298 if (optimizeAMXCastFromPhi(cast<IntrinsicInst>(
I), PN, DeadInst)) {
1306 while (!DeadInst.
empty()) {
1310 LLVM_DEBUG(
dbgs() <<
"[LowerAMXTYpe][combineAMXcast] IR dump after "
1311 "optimizeAMXCastFromPhi:\n";
1318bool X86LowerAMXCast::transformAMXCast(
IntrinsicInst *AMXCast) {
1321 Value *I8Ptr, *Stride;
1324 auto Prepare = [&](
Type *MemTy) {
1326 I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getPtrTy());
1327 Stride = Builder.getInt64(64);
1348 unsigned OpNo =
U.getOperandNo();
1349 auto *
II = dyn_cast<IntrinsicInst>(
U.getUser());
1353 Builder.CreateStore(Src, AllocaAddr);
1355 Value *Row =
nullptr, *Col =
nullptr;
1356 std::tie(Row, Col) =
SC->getShape(
II, OpNo);
1357 std::array<Value *, 4>
Args = {
1358 Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())};
1360 Builder.CreateIntrinsic(Intrinsic::x86_tileloadd64_internal, {},
Args);
1371 auto *
II = dyn_cast<IntrinsicInst>(Src);
1375 Value *Row =
II->getOperand(0);
1376 Value *Col =
II->getOperand(1);
1377 std::array<Value *, 5>
Args = {
1378 Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty()), Src};
1379 Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, {},
Args);
1380 Value *NewInst = Builder.CreateLoad(AMXCast->
getType(), AllocaAddr);
1388bool X86LowerAMXCast::transformAllAMXCast() {
1389 bool Change =
false;
1399 for (
auto *Inst : WorkLists) {
1400 Change |= transformAMXCast(cast<IntrinsicInst>(Inst));
1428 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1431 X86LowerAMXCast LAC(
F, &SC);
1432 C |= LAC.combineAMXcast(TLI);
1435 C |= LAC.transformAllAMXCast();
1437 X86LowerAMXType LAT(
F, &SC);
1443 if (
TM->getOptLevel() == CodeGenOptLevel::None) {
1448 if (!
F.hasFnAttribute(Attribute::OptimizeNone)) {
1449 X86VolatileTileData VTD(
F);
1450 C = VTD.volatileTileData() ||
C;
1466static const char PassName[] =
"Lower AMX type for load/store";
1467char X86LowerAMXTypeLegacyPass::ID = 0;
1476 return new X86LowerAMXTypeLegacyPass();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool DCEInstruction(Instruction *I, SmallSetVector< Instruction *, 16 > &WorkList, const TargetLibraryInfo *TLI)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
This file implements a set that has insertion order iteration characteristics.
Target-Independent Code Generator Pass Configuration Options pass.
static bool isAMXCast(Instruction *II)
static void replaceWithTileLoad(Use &U, Value *Ptr, bool IsPHI=false)
static Instruction * createTileStore(Instruction *TileDef, Value *Ptr)
static unsigned getNumDefTiles(IntrinsicInst *II)
static Value * getAllocaPos(BasicBlock *BB)
static bool containsAMXCode(Function &F)
static bool isIncomingOfPHI(Instruction *I)
static bool isAMXIntrinsic(Value *I)
static const char PassName[]
static Instruction * getFirstNonAllocaInTheEntryBlock(Function &F)
static AllocaInst * createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB, Type *Ty)
Value * getRowFromCol(Instruction *II, Value *V, unsigned Granularity)
ShapeCalculator(TargetMachine *TargetM)
Value * getColFromRow(Instruction *II, Value *V, unsigned Granularity)
std::pair< Value *, Value * > getShape(IntrinsicInst *II, unsigned OpNo)
an instruction to allocate memory on the stack
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name="")
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LLVMContext & getContext() const
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
ConstantInt * getInt16(uint16_t C)
Get a constant 16-bit value.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
value_type pop_back_val()
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getX86_AMXTy(LLVMContext &C)
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
A Use represents the edge between a Value definition and its users.
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
iterator_range< po_iterator< T > > post_order(const T &G)
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
FunctionPass * createX86LowerAMXTypePass()
The pass transforms load/store <256 x i32> to AMX load/store intrinsics or split the data to two <128...