LLVM 20.0.0git
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for load, store and alloca.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/Loads.h"
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/LLVMContext.h"
27using namespace llvm;
28using namespace PatternMatch;
29
30#define DEBUG_TYPE "instcombine"
31
32STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34
36 "instcombine-max-copied-from-constant-users", cl::init(300),
37 cl::desc("Maximum users to visit in copy from constant transform"),
39
40/// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
41/// pointer to an alloca. Ignore any reads of the pointer, return false if we
42/// see any stores or other unknown uses. If we see pointer arithmetic, keep
43/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
44/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
45/// the alloca, and if the source pointer is a pointer to a constant memory
46/// location, we can optimize this.
47static bool
49 MemTransferInst *&TheCopy,
51 // We track lifetime intrinsics as we encounter them. If we decide to go
52 // ahead and replace the value with the memory location, this lets the caller
53 // quickly eliminate the markers.
54
55 using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
58 Worklist.emplace_back(V, false);
59 while (!Worklist.empty()) {
60 ValueAndIsOffset Elem = Worklist.pop_back_val();
61 if (!Visited.insert(Elem).second)
62 continue;
63 if (Visited.size() > MaxCopiedFromConstantUsers)
64 return false;
65
66 const auto [Value, IsOffset] = Elem;
67 for (auto &U : Value->uses()) {
68 auto *I = cast<Instruction>(U.getUser());
69
70 if (auto *LI = dyn_cast<LoadInst>(I)) {
71 // Ignore non-volatile loads, they are always ok.
72 if (!LI->isSimple()) return false;
73 continue;
74 }
75
76 if (isa<PHINode, SelectInst>(I)) {
77 // We set IsOffset=true, to forbid the memcpy from occurring after the
78 // phi: If one of the phi operands is not based on the alloca, we
79 // would incorrectly omit a write.
80 Worklist.emplace_back(I, true);
81 continue;
82 }
83 if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
84 // If uses of the bitcast are ok, we are ok.
85 Worklist.emplace_back(I, IsOffset);
86 continue;
87 }
88 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
89 // If the GEP has all zero indices, it doesn't offset the pointer. If it
90 // doesn't, it does.
91 Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
92 continue;
93 }
94
95 if (auto *Call = dyn_cast<CallBase>(I)) {
96 // If this is the function being called then we treat it like a load and
97 // ignore it.
98 if (Call->isCallee(&U))
99 continue;
100
101 unsigned DataOpNo = Call->getDataOperandNo(&U);
102 bool IsArgOperand = Call->isArgOperand(&U);
103
104 // Inalloca arguments are clobbered by the call.
105 if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
106 return false;
107
108 // If this call site doesn't modify the memory, then we know it is just
109 // a load (but one that potentially returns the value itself), so we can
110 // ignore it if we know that the value isn't captured.
111 bool NoCapture = Call->doesNotCapture(DataOpNo);
112 if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
113 (Call->onlyReadsMemory(DataOpNo) && NoCapture))
114 continue;
115 }
116
117 // Lifetime intrinsics can be handled by the caller.
118 if (I->isLifetimeStartOrEnd()) {
119 assert(I->use_empty() && "Lifetime markers have no result to use!");
120 ToDelete.push_back(I);
121 continue;
122 }
123
124 // If this is isn't our memcpy/memmove, reject it as something we can't
125 // handle.
126 MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
127 if (!MI)
128 return false;
129
130 // If the transfer is volatile, reject it.
131 if (MI->isVolatile())
132 return false;
133
134 // If the transfer is using the alloca as a source of the transfer, then
135 // ignore it since it is a load (unless the transfer is volatile).
136 if (U.getOperandNo() == 1)
137 continue;
138
139 // If we already have seen a copy, reject the second one.
140 if (TheCopy) return false;
141
142 // If the pointer has been offset from the start of the alloca, we can't
143 // safely handle this.
144 if (IsOffset) return false;
145
146 // If the memintrinsic isn't using the alloca as the dest, reject it.
147 if (U.getOperandNo() != 0) return false;
148
149 // If the source of the memcpy/move is not constant, reject it.
150 if (isModSet(AA->getModRefInfoMask(MI->getSource())))
151 return false;
152
153 // Otherwise, the transform is safe. Remember the copy instruction.
154 TheCopy = MI;
155 }
156 }
157 return true;
158}
159
160/// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
161/// modified by a copy from a constant memory location. If we can prove this, we
162/// can replace any uses of the alloca with uses of the memory location
163/// directly.
164static MemTransferInst *
166 AllocaInst *AI,
168 MemTransferInst *TheCopy = nullptr;
169 if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
170 return TheCopy;
171 return nullptr;
172}
173
174/// Returns true if V is dereferenceable for size of alloca.
175static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
176 const DataLayout &DL) {
177 if (AI->isArrayAllocation())
178 return false;
179 uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
180 if (!AllocaSize)
181 return false;
183 APInt(64, AllocaSize), DL);
184}
185
187 AllocaInst &AI, DominatorTree &DT) {
188 // Check for array size of 1 (scalar allocation).
189 if (!AI.isArrayAllocation()) {
190 // i32 1 is the canonical array size for scalar allocations.
191 if (AI.getArraySize()->getType()->isIntegerTy(32))
192 return nullptr;
193
194 // Canonicalize it.
195 return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
196 }
197
198 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
199 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
200 if (C->getValue().getActiveBits() <= 64) {
201 Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
202 AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
203 nullptr, AI.getName());
204 New->setAlignment(AI.getAlign());
205 New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
206
207 replaceAllDbgUsesWith(AI, *New, *New, DT);
208 return IC.replaceInstUsesWith(AI, New);
209 }
210 }
211
212 if (isa<UndefValue>(AI.getArraySize()))
214
215 // Ensure that the alloca array size argument has type equal to the offset
216 // size of the alloca() pointer, which, in the tyical case, is intptr_t,
217 // so that any casting is exposed early.
218 Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
219 if (AI.getArraySize()->getType() != PtrIdxTy) {
220 Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
221 return IC.replaceOperand(AI, 0, V);
222 }
223
224 return nullptr;
225}
226
227namespace {
228// If I and V are pointers in different address space, it is not allowed to
229// use replaceAllUsesWith since I and V have different types. A
230// non-target-specific transformation should not use addrspacecast on V since
231// the two address space may be disjoint depending on target.
232//
233// This class chases down uses of the old pointer until reaching the load
234// instructions, then replaces the old pointer in the load instructions with
235// the new pointer. If during the chasing it sees bitcast or GEP, it will
236// create new bitcast or GEP with the new pointer and use them in the load
237// instruction.
238class PointerReplacer {
239public:
240 PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
241 : IC(IC), Root(Root), FromAS(SrcAS) {}
242
243 bool collectUsers();
244 void replacePointer(Value *V);
245
246private:
247 bool collectUsersRecursive(Instruction &I);
248 void replace(Instruction *I);
249 Value *getReplacement(Value *I);
250 bool isAvailable(Instruction *I) const {
251 return I == &Root || Worklist.contains(I);
252 }
253
254 bool isEqualOrValidAddrSpaceCast(const Instruction *I,
255 unsigned FromAS) const {
256 const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
257 if (!ASC)
258 return false;
259 unsigned ToAS = ASC->getDestAddressSpace();
260 return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
261 }
262
263 SmallPtrSet<Instruction *, 32> ValuesToRevisit;
267 Instruction &Root;
268 unsigned FromAS;
269};
270} // end anonymous namespace
271
272bool PointerReplacer::collectUsers() {
273 if (!collectUsersRecursive(Root))
274 return false;
275
276 // Ensure that all outstanding (indirect) users of I
277 // are inserted into the Worklist. Return false
278 // otherwise.
279 return llvm::set_is_subset(ValuesToRevisit, Worklist);
280}
281
282bool PointerReplacer::collectUsersRecursive(Instruction &I) {
283 for (auto *U : I.users()) {
284 auto *Inst = cast<Instruction>(&*U);
285 if (auto *Load = dyn_cast<LoadInst>(Inst)) {
286 if (Load->isVolatile())
287 return false;
288 Worklist.insert(Load);
289 } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
290 // All incoming values must be instructions for replacability
291 if (any_of(PHI->incoming_values(),
292 [](Value *V) { return !isa<Instruction>(V); }))
293 return false;
294
295 // If at least one incoming value of the PHI is not in Worklist,
296 // store the PHI for revisiting and skip this iteration of the
297 // loop.
298 if (any_of(PHI->incoming_values(), [this](Value *V) {
299 return !isAvailable(cast<Instruction>(V));
300 })) {
301 ValuesToRevisit.insert(Inst);
302 continue;
303 }
304
305 Worklist.insert(PHI);
306 if (!collectUsersRecursive(*PHI))
307 return false;
308 } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
309 if (!isa<Instruction>(SI->getTrueValue()) ||
310 !isa<Instruction>(SI->getFalseValue()))
311 return false;
312
313 if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
314 !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
315 ValuesToRevisit.insert(Inst);
316 continue;
317 }
318 Worklist.insert(SI);
319 if (!collectUsersRecursive(*SI))
320 return false;
321 } else if (isa<GetElementPtrInst>(Inst)) {
322 Worklist.insert(Inst);
323 if (!collectUsersRecursive(*Inst))
324 return false;
325 } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
326 if (MI->isVolatile())
327 return false;
328 Worklist.insert(Inst);
329 } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
330 Worklist.insert(Inst);
331 if (!collectUsersRecursive(*Inst))
332 return false;
333 } else if (Inst->isLifetimeStartOrEnd()) {
334 continue;
335 } else {
336 // TODO: For arbitrary uses with address space mismatches, should we check
337 // if we can introduce a valid addrspacecast?
338 LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
339 return false;
340 }
341 }
342
343 return true;
344}
345
346Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
347
348void PointerReplacer::replace(Instruction *I) {
349 if (getReplacement(I))
350 return;
351
352 if (auto *LT = dyn_cast<LoadInst>(I)) {
353 auto *V = getReplacement(LT->getPointerOperand());
354 assert(V && "Operand not replaced");
355 auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
356 LT->getAlign(), LT->getOrdering(),
357 LT->getSyncScopeID());
358 NewI->takeName(LT);
359 copyMetadataForLoad(*NewI, *LT);
360
361 IC.InsertNewInstWith(NewI, LT->getIterator());
362 IC.replaceInstUsesWith(*LT, NewI);
363 WorkMap[LT] = NewI;
364 } else if (auto *PHI = dyn_cast<PHINode>(I)) {
365 Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
366 auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
367 PHI->getName(), PHI->getIterator());
368 for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
369 NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
370 PHI->getIncomingBlock(I));
371 WorkMap[PHI] = NewPHI;
372 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
373 auto *V = getReplacement(GEP->getPointerOperand());
374 assert(V && "Operand not replaced");
375 SmallVector<Value *, 8> Indices(GEP->indices());
376 auto *NewI =
377 GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
378 IC.InsertNewInstWith(NewI, GEP->getIterator());
379 NewI->takeName(GEP);
380 NewI->setNoWrapFlags(GEP->getNoWrapFlags());
381 WorkMap[GEP] = NewI;
382 } else if (auto *SI = dyn_cast<SelectInst>(I)) {
383 Value *TrueValue = SI->getTrueValue();
384 Value *FalseValue = SI->getFalseValue();
385 if (Value *Replacement = getReplacement(TrueValue))
386 TrueValue = Replacement;
387 if (Value *Replacement = getReplacement(FalseValue))
388 FalseValue = Replacement;
389 auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
390 SI->getName(), nullptr, SI);
391 IC.InsertNewInstWith(NewSI, SI->getIterator());
392 NewSI->takeName(SI);
393 WorkMap[SI] = NewSI;
394 } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
395 auto *DestV = MemCpy->getRawDest();
396 auto *SrcV = MemCpy->getRawSource();
397
398 if (auto *DestReplace = getReplacement(DestV))
399 DestV = DestReplace;
400 if (auto *SrcReplace = getReplacement(SrcV))
401 SrcV = SrcReplace;
402
403 IC.Builder.SetInsertPoint(MemCpy);
404 auto *NewI = IC.Builder.CreateMemTransferInst(
405 MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
406 MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
407 AAMDNodes AAMD = MemCpy->getAAMetadata();
408 if (AAMD)
409 NewI->setAAMetadata(AAMD);
410
411 IC.eraseInstFromFunction(*MemCpy);
412 WorkMap[MemCpy] = NewI;
413 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
414 auto *V = getReplacement(ASC->getPointerOperand());
415 assert(V && "Operand not replaced");
416 assert(isEqualOrValidAddrSpaceCast(
417 ASC, V->getType()->getPointerAddressSpace()) &&
418 "Invalid address space cast!");
419
420 if (V->getType()->getPointerAddressSpace() !=
421 ASC->getType()->getPointerAddressSpace()) {
422 auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
423 NewI->takeName(ASC);
424 IC.InsertNewInstWith(NewI, ASC->getIterator());
425 WorkMap[ASC] = NewI;
426 } else {
427 WorkMap[ASC] = V;
428 }
429
430 } else {
431 llvm_unreachable("should never reach here");
432 }
433}
434
435void PointerReplacer::replacePointer(Value *V) {
436#ifndef NDEBUG
437 auto *PT = cast<PointerType>(Root.getType());
438 auto *NT = cast<PointerType>(V->getType());
439 assert(PT != NT && "Invalid usage");
440#endif
441 WorkMap[&Root] = V;
442
443 for (Instruction *Workitem : Worklist)
444 replace(Workitem);
445}
446
448 if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
449 return I;
450
451 if (AI.getAllocatedType()->isSized()) {
452 // Move all alloca's of zero byte objects to the entry block and merge them
453 // together. Note that we only do this for alloca's, because malloc should
454 // allocate and return a unique pointer, even for a zero byte allocation.
456 // For a zero sized alloca there is no point in doing an array allocation.
457 // This is helpful if the array size is a complicated expression not used
458 // elsewhere.
459 if (AI.isArrayAllocation())
460 return replaceOperand(AI, 0,
461 ConstantInt::get(AI.getArraySize()->getType(), 1));
462
463 // Get the first instruction in the entry block.
464 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
465 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
466 if (FirstInst != &AI) {
467 // If the entry block doesn't start with a zero-size alloca then move
468 // this one to the start of the entry block. There is no problem with
469 // dominance as the array size was forced to a constant earlier already.
470 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
471 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
473 .getKnownMinValue() != 0) {
474 AI.moveBefore(FirstInst);
475 return &AI;
476 }
477
478 // Replace this zero-sized alloca with the one at the start of the entry
479 // block after ensuring that the address will be aligned enough for both
480 // types.
481 const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
482 EntryAI->setAlignment(MaxAlign);
483 return replaceInstUsesWith(AI, EntryAI);
484 }
485 }
486 }
487
488 // Check to see if this allocation is only modified by a memcpy/memmove from
489 // a memory location whose alignment is equal to or exceeds that of the
490 // allocation. If this is the case, we can change all users to use the
491 // constant memory location instead. This is commonly produced by the CFE by
492 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
493 // is only subsequently read.
495 if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
496 Value *TheSrc = Copy->getSource();
497 Align AllocaAlign = AI.getAlign();
498 Align SourceAlign = getOrEnforceKnownAlignment(
499 TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
500 if (AllocaAlign <= SourceAlign &&
501 isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
502 !isa<Instruction>(TheSrc)) {
503 // FIXME: Can we sink instructions without violating dominance when TheSrc
504 // is an instruction instead of a constant or argument?
505 LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
506 LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
507 unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
508 if (AI.getAddressSpace() == SrcAddrSpace) {
509 for (Instruction *Delete : ToDelete)
510 eraseInstFromFunction(*Delete);
511
512 Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
514 ++NumGlobalCopies;
515 return NewI;
516 }
517
518 PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
519 if (PtrReplacer.collectUsers()) {
520 for (Instruction *Delete : ToDelete)
521 eraseInstFromFunction(*Delete);
522
523 PtrReplacer.replacePointer(TheSrc);
524 ++NumGlobalCopies;
525 }
526 }
527 }
528
529 // At last, use the generic allocation site handler to aggressively remove
530 // unused allocas.
531 return visitAllocSite(AI);
532}
533
534// Are we allowed to form a atomic load or store of this type?
535static bool isSupportedAtomicType(Type *Ty) {
536 return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
537}
538
539/// Helper to combine a load to a new type.
540///
541/// This just does the work of combining a load to a new type. It handles
542/// metadata, etc., and returns the new instruction. The \c NewTy should be the
543/// loaded *value* type. This will convert it to a pointer, cast the operand to
544/// that pointer type, load it, etc.
545///
546/// Note that this will create all of the instructions with whatever insert
547/// point the \c InstCombinerImpl currently is using.
549 const Twine &Suffix) {
550 assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
551 "can't fold an atomic load to requested type");
552
553 LoadInst *NewLoad =
555 LI.isVolatile(), LI.getName() + Suffix);
556 NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
557 copyMetadataForLoad(*NewLoad, LI);
558 return NewLoad;
559}
560
561/// Combine a store to a new type.
562///
563/// Returns the newly created store instruction.
565 Value *V) {
566 assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
567 "can't fold an atomic store of requested type");
568
569 Value *Ptr = SI.getPointerOperand();
571 SI.getAllMetadata(MD);
572
573 StoreInst *NewStore =
574 IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
575 NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
576 for (const auto &MDPair : MD) {
577 unsigned ID = MDPair.first;
578 MDNode *N = MDPair.second;
579 // Note, essentially every kind of metadata should be preserved here! This
580 // routine is supposed to clone a store instruction changing *only its
581 // type*. The only metadata it makes sense to drop is metadata which is
582 // invalidated when the pointer type changes. This should essentially
583 // never be the case in LLVM, but we explicitly switch over only known
584 // metadata to be conservatively correct. If you are adding metadata to
585 // LLVM which pertains to stores, you almost certainly want to add it
586 // here.
587 switch (ID) {
588 case LLVMContext::MD_dbg:
589 case LLVMContext::MD_DIAssignID:
590 case LLVMContext::MD_tbaa:
591 case LLVMContext::MD_prof:
592 case LLVMContext::MD_fpmath:
593 case LLVMContext::MD_tbaa_struct:
594 case LLVMContext::MD_alias_scope:
595 case LLVMContext::MD_noalias:
596 case LLVMContext::MD_nontemporal:
597 case LLVMContext::MD_mem_parallel_loop_access:
598 case LLVMContext::MD_access_group:
599 // All of these directly apply.
600 NewStore->setMetadata(ID, N);
601 break;
602 case LLVMContext::MD_invariant_load:
603 case LLVMContext::MD_nonnull:
604 case LLVMContext::MD_noundef:
605 case LLVMContext::MD_range:
606 case LLVMContext::MD_align:
607 case LLVMContext::MD_dereferenceable:
608 case LLVMContext::MD_dereferenceable_or_null:
609 // These don't apply for stores.
610 break;
611 }
612 }
613
614 return NewStore;
615}
616
617/// Combine loads to match the type of their uses' value after looking
618/// through intervening bitcasts.
619///
620/// The core idea here is that if the result of a load is used in an operation,
621/// we should load the type most conducive to that operation. For example, when
622/// loading an integer and converting that immediately to a pointer, we should
623/// instead directly load a pointer.
624///
625/// However, this routine must never change the width of a load or the number of
626/// loads as that would introduce a semantic change. This combine is expected to
627/// be a semantic no-op which just allows loads to more closely model the types
628/// of their consuming operations.
629///
630/// Currently, we also refuse to change the precise type used for an atomic load
631/// or a volatile load. This is debatable, and might be reasonable to change
632/// later. However, it is risky in case some backend or other part of LLVM is
633/// relying on the exact type loaded to select appropriate atomic operations.
635 LoadInst &Load) {
636 // FIXME: We could probably with some care handle both volatile and ordered
637 // atomic loads here but it isn't clear that this is important.
638 if (!Load.isUnordered())
639 return nullptr;
640
641 if (Load.use_empty())
642 return nullptr;
643
644 // swifterror values can't be bitcasted.
645 if (Load.getPointerOperand()->isSwiftError())
646 return nullptr;
647
648 // Fold away bit casts of the loaded value by loading the desired type.
649 // Note that we should not do this for pointer<->integer casts,
650 // because that would result in type punning.
651 if (Load.hasOneUse()) {
652 // Don't transform when the type is x86_amx, it makes the pass that lower
653 // x86_amx type happy.
654 Type *LoadTy = Load.getType();
655 if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
656 assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
657 if (BC->getType()->isX86_AMXTy())
658 return nullptr;
659 }
660
661 if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
662 Type *DestTy = CastUser->getDestTy();
663 if (CastUser->isNoopCast(IC.getDataLayout()) &&
664 LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
665 (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
666 LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
667 CastUser->replaceAllUsesWith(NewLoad);
668 IC.eraseInstFromFunction(*CastUser);
669 return &Load;
670 }
671 }
672 }
673
674 // FIXME: We should also canonicalize loads of vectors when their elements are
675 // cast to other types.
676 return nullptr;
677}
678
680 // FIXME: We could probably with some care handle both volatile and atomic
681 // stores here but it isn't clear that this is important.
682 if (!LI.isSimple())
683 return nullptr;
684
685 Type *T = LI.getType();
686 if (!T->isAggregateType())
687 return nullptr;
688
689 StringRef Name = LI.getName();
690
691 if (auto *ST = dyn_cast<StructType>(T)) {
692 // If the struct only have one element, we unpack.
693 auto NumElements = ST->getNumElements();
694 if (NumElements == 1) {
695 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
696 ".unpack");
697 NewLoad->setAAMetadata(LI.getAAMetadata());
699 PoisonValue::get(T), NewLoad, 0, Name));
700 }
701
702 // We don't want to break loads with padding here as we'd loose
703 // the knowledge that padding exists for the rest of the pipeline.
704 const DataLayout &DL = IC.getDataLayout();
705 auto *SL = DL.getStructLayout(ST);
706
707 // Don't unpack for structure with scalable vector.
708 if (SL->getSizeInBits().isScalable())
709 return nullptr;
710
711 if (SL->hasPadding())
712 return nullptr;
713
714 const auto Align = LI.getAlign();
715 auto *Addr = LI.getPointerOperand();
716 auto *IdxType = Type::getInt32Ty(T->getContext());
717 auto *Zero = ConstantInt::get(IdxType, 0);
718
720 for (unsigned i = 0; i < NumElements; i++) {
721 Value *Indices[2] = {
722 Zero,
723 ConstantInt::get(IdxType, i),
724 };
725 auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
726 Name + ".elt");
727 auto *L = IC.Builder.CreateAlignedLoad(
728 ST->getElementType(i), Ptr,
729 commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
730 // Propagate AA metadata. It'll still be valid on the narrowed load.
731 L->setAAMetadata(LI.getAAMetadata());
732 V = IC.Builder.CreateInsertValue(V, L, i);
733 }
734
735 V->setName(Name);
736 return IC.replaceInstUsesWith(LI, V);
737 }
738
739 if (auto *AT = dyn_cast<ArrayType>(T)) {
740 auto *ET = AT->getElementType();
741 auto NumElements = AT->getNumElements();
742 if (NumElements == 1) {
743 LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
744 NewLoad->setAAMetadata(LI.getAAMetadata());
746 PoisonValue::get(T), NewLoad, 0, Name));
747 }
748
749 // Bail out if the array is too large. Ideally we would like to optimize
750 // arrays of arbitrary size but this has a terrible impact on compile time.
751 // The threshold here is chosen arbitrarily, maybe needs a little bit of
752 // tuning.
753 if (NumElements > IC.MaxArraySizeForCombine)
754 return nullptr;
755
756 const DataLayout &DL = IC.getDataLayout();
757 TypeSize EltSize = DL.getTypeAllocSize(ET);
758 const auto Align = LI.getAlign();
759
760 auto *Addr = LI.getPointerOperand();
761 auto *IdxType = Type::getInt64Ty(T->getContext());
762 auto *Zero = ConstantInt::get(IdxType, 0);
763
766 for (uint64_t i = 0; i < NumElements; i++) {
767 Value *Indices[2] = {
768 Zero,
769 ConstantInt::get(IdxType, i),
770 };
771 auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
772 Name + ".elt");
773 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
774 auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
775 EltAlign, Name + ".unpack");
776 L->setAAMetadata(LI.getAAMetadata());
777 V = IC.Builder.CreateInsertValue(V, L, i);
778 Offset += EltSize;
779 }
780
781 V->setName(Name);
782 return IC.replaceInstUsesWith(LI, V);
783 }
784
785 return nullptr;
786}
787
788// If we can determine that all possible objects pointed to by the provided
789// pointer value are, not only dereferenceable, but also definitively less than
790// or equal to the provided maximum size, then return true. Otherwise, return
791// false (constant global values and allocas fall into this category).
792//
793// FIXME: This should probably live in ValueTracking (or similar).
795 const DataLayout &DL) {
797 SmallVector<Value *, 4> Worklist(1, V);
798
799 do {
800 Value *P = Worklist.pop_back_val();
801 P = P->stripPointerCasts();
802
803 if (!Visited.insert(P).second)
804 continue;
805
806 if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
807 Worklist.push_back(SI->getTrueValue());
808 Worklist.push_back(SI->getFalseValue());
809 continue;
810 }
811
812 if (PHINode *PN = dyn_cast<PHINode>(P)) {
813 append_range(Worklist, PN->incoming_values());
814 continue;
815 }
816
817 if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
818 if (GA->isInterposable())
819 return false;
820 Worklist.push_back(GA->getAliasee());
821 continue;
822 }
823
824 // If we know how big this object is, and it is less than MaxSize, continue
825 // searching. Otherwise, return false.
826 if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
827 if (!AI->getAllocatedType()->isSized())
828 return false;
829
830 ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
831 if (!CS)
832 return false;
833
834 TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
835 if (TS.isScalable())
836 return false;
837 // Make sure that, even if the multiplication below would wrap as an
838 // uint64_t, we still do the right thing.
839 if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
840 .ugt(MaxSize))
841 return false;
842 continue;
843 }
844
845 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
846 if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
847 return false;
848
849 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
850 if (InitSize > MaxSize)
851 return false;
852 continue;
853 }
854
855 return false;
856 } while (!Worklist.empty());
857
858 return true;
859}
860
861// If we're indexing into an object of a known size, and the outer index is
862// not a constant, but having any value but zero would lead to undefined
863// behavior, replace it with zero.
864//
865// For example, if we have:
866// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
867// ...
868// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
869// ... = load i32* %arrayidx, align 4
870// Then we know that we can replace %x in the GEP with i64 0.
871//
872// FIXME: We could fold any GEP index to zero that would cause UB if it were
873// not zero. Currently, we only handle the first such index. Also, we could
874// also search through non-zero constant indices if we kept track of the
875// offsets those indices implied.
877 GetElementPtrInst *GEPI, Instruction *MemI,
878 unsigned &Idx) {
879 if (GEPI->getNumOperands() < 2)
880 return false;
881
882 // Find the first non-zero index of a GEP. If all indices are zero, return
883 // one past the last index.
884 auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
885 unsigned I = 1;
886 for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
887 Value *V = GEPI->getOperand(I);
888 if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
889 if (CI->isZero())
890 continue;
891
892 break;
893 }
894
895 return I;
896 };
897
898 // Skip through initial 'zero' indices, and find the corresponding pointer
899 // type. See if the next index is not a constant.
900 Idx = FirstNZIdx(GEPI);
901 if (Idx == GEPI->getNumOperands())
902 return false;
903 if (isa<Constant>(GEPI->getOperand(Idx)))
904 return false;
905
906 SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
907 Type *SourceElementType = GEPI->getSourceElementType();
908 // Size information about scalable vectors is not available, so we cannot
909 // deduce whether indexing at n is undefined behaviour or not. Bail out.
910 if (SourceElementType->isScalableTy())
911 return false;
912
913 Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
914 if (!AllocTy || !AllocTy->isSized())
915 return false;
916 const DataLayout &DL = IC.getDataLayout();
917 uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
918
919 // If there are more indices after the one we might replace with a zero, make
920 // sure they're all non-negative. If any of them are negative, the overall
921 // address being computed might be before the base address determined by the
922 // first non-zero index.
923 auto IsAllNonNegative = [&]() {
924 for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
925 KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
926 if (Known.isNonNegative())
927 continue;
928 return false;
929 }
930
931 return true;
932 };
933
934 // FIXME: If the GEP is not inbounds, and there are extra indices after the
935 // one we'll replace, those could cause the address computation to wrap
936 // (rendering the IsAllNonNegative() check below insufficient). We can do
937 // better, ignoring zero indices (and other indices we can prove small
938 // enough not to wrap).
939 if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
940 return false;
941
942 // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
943 // also known to be dereferenceable.
944 return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
945 IsAllNonNegative();
946}
947
948// If we're indexing into an object with a variable index for the memory
949// access, but the object has only one element, we can assume that the index
950// will always be zero. If we replace the GEP, return it.
952 Instruction &MemI) {
953 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
954 unsigned Idx;
955 if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
956 Instruction *NewGEPI = GEPI->clone();
957 NewGEPI->setOperand(Idx,
958 ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
959 IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
960 return NewGEPI;
961 }
962 }
963
964 return nullptr;
965}
966
968 if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
969 return false;
970
971 auto *Ptr = SI.getPointerOperand();
972 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
973 Ptr = GEPI->getOperand(0);
974 return (isa<ConstantPointerNull>(Ptr) &&
975 !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
976}
977
979 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
980 const Value *GEPI0 = GEPI->getOperand(0);
981 if (isa<ConstantPointerNull>(GEPI0) &&
982 !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
983 return true;
984 }
985 if (isa<UndefValue>(Op) ||
986 (isa<ConstantPointerNull>(Op) &&
988 return true;
989 return false;
990}
991
993 Value *Op = LI.getOperand(0);
994 if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
995 return replaceInstUsesWith(LI, Res);
996
997 // Try to canonicalize the loaded type.
998 if (Instruction *Res = combineLoadToOperationType(*this, LI))
999 return Res;
1000
1001 // Replace GEP indices if possible.
1002 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
1003 return replaceOperand(LI, 0, NewGEPI);
1004
1005 if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1006 return Res;
1007
1008 // Do really simple store-to-load forwarding and load CSE, to catch cases
1009 // where there are several consecutive memory accesses to the same location,
1010 // separated by a few arithmetic operations.
1011 bool IsLoadCSE = false;
1012 BatchAAResults BatchAA(*AA);
1013 if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
1014 if (IsLoadCSE)
1015 combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1016
1017 return replaceInstUsesWith(
1018 LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1019 LI.getName() + ".cast"));
1020 }
1021
1022 // None of the following transforms are legal for volatile/ordered atomic
1023 // loads. Most of them do apply for unordered atomics.
1024 if (!LI.isUnordered()) return nullptr;
1025
1026 // load(gep null, ...) -> unreachable
1027 // load null/undef -> unreachable
1028 // TODO: Consider a target hook for valid address spaces for this xforms.
1029 if (canSimplifyNullLoadOrGEP(LI, Op)) {
1032 }
1033
1034 if (Op->hasOneUse()) {
1035 // Change select and PHI nodes to select values instead of addresses: this
1036 // helps alias analysis out a lot, allows many others simplifications, and
1037 // exposes redundancy in the code.
1038 //
1039 // Note that we cannot do the transformation unless we know that the
1040 // introduced loads cannot trap! Something like this is valid as long as
1041 // the condition is always false: load (select bool %C, int* null, int* %G),
1042 // but it would not be valid if we transformed it to load from null
1043 // unconditionally.
1044 //
1045 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1046 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1047 Align Alignment = LI.getAlign();
1048 if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
1049 Alignment, DL, SI) &&
1050 isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
1051 Alignment, DL, SI)) {
1052 LoadInst *V1 =
1053 Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1054 SI->getOperand(1)->getName() + ".val");
1055 LoadInst *V2 =
1056 Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1057 SI->getOperand(2)->getName() + ".val");
1058 assert(LI.isUnordered() && "implied by above");
1059 V1->setAlignment(Alignment);
1060 V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1061 V2->setAlignment(Alignment);
1062 V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1063 // It is safe to copy any metadata that does not trigger UB. Copy any
1064 // poison-generating metadata.
1066 V2->copyMetadata(LI, Metadata::PoisonGeneratingIDs);
1067 return SelectInst::Create(SI->getCondition(), V1, V2);
1068 }
1069
1070 // load (select (cond, null, P)) -> load P
1071 if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1072 !NullPointerIsDefined(SI->getFunction(),
1074 return replaceOperand(LI, 0, SI->getOperand(2));
1075
1076 // load (select (cond, P, null)) -> load P
1077 if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1078 !NullPointerIsDefined(SI->getFunction(),
1080 return replaceOperand(LI, 0, SI->getOperand(1));
1081 }
1082 }
1083 return nullptr;
1084}
1085
1086/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1087///
1088/// \returns underlying value that was "cast", or nullptr otherwise.
1089///
1090/// For example, if we have:
1091///
1092/// %E0 = extractelement <2 x double> %U, i32 0
1093/// %V0 = insertvalue [2 x double] undef, double %E0, 0
1094/// %E1 = extractelement <2 x double> %U, i32 1
1095/// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1096///
1097/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1098/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1099/// Note that %U may contain non-undef values where %V1 has undef.
1101 Value *U = nullptr;
1102 while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1103 auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1104 if (!E)
1105 return nullptr;
1106 auto *W = E->getVectorOperand();
1107 if (!U)
1108 U = W;
1109 else if (U != W)
1110 return nullptr;
1111 auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1112 if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1113 return nullptr;
1114 V = IV->getAggregateOperand();
1115 }
1116 if (!match(V, m_Undef()) || !U)
1117 return nullptr;
1118
1119 auto *UT = cast<VectorType>(U->getType());
1120 auto *VT = V->getType();
1121 // Check that types UT and VT are bitwise isomorphic.
1122 const auto &DL = IC.getDataLayout();
1123 if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1124 return nullptr;
1125 }
1126 if (auto *AT = dyn_cast<ArrayType>(VT)) {
1127 if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1128 return nullptr;
1129 } else {
1130 auto *ST = cast<StructType>(VT);
1131 if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
1132 return nullptr;
1133 for (const auto *EltT : ST->elements()) {
1134 if (EltT != UT->getElementType())
1135 return nullptr;
1136 }
1137 }
1138 return U;
1139}
1140
1141/// Combine stores to match the type of value being stored.
1142///
1143/// The core idea here is that the memory does not have any intrinsic type and
1144/// where we can we should match the type of a store to the type of value being
1145/// stored.
1146///
1147/// However, this routine must never change the width of a store or the number of
1148/// stores as that would introduce a semantic change. This combine is expected to
1149/// be a semantic no-op which just allows stores to more closely model the types
1150/// of their incoming values.
1151///
1152/// Currently, we also refuse to change the precise type used for an atomic or
1153/// volatile store. This is debatable, and might be reasonable to change later.
1154/// However, it is risky in case some backend or other part of LLVM is relying
1155/// on the exact type stored to select appropriate atomic operations.
1156///
1157/// \returns true if the store was successfully combined away. This indicates
1158/// the caller must erase the store instruction. We have to let the caller erase
1159/// the store instruction as otherwise there is no way to signal whether it was
1160/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1162 // FIXME: We could probably with some care handle both volatile and ordered
1163 // atomic stores here but it isn't clear that this is important.
1164 if (!SI.isUnordered())
1165 return false;
1166
1167 // swifterror values can't be bitcasted.
1168 if (SI.getPointerOperand()->isSwiftError())
1169 return false;
1170
1171 Value *V = SI.getValueOperand();
1172
1173 // Fold away bit casts of the stored value by storing the original type.
1174 if (auto *BC = dyn_cast<BitCastInst>(V)) {
1175 assert(!BC->getType()->isX86_AMXTy() &&
1176 "store to x86_amx* should not happen!");
1177 V = BC->getOperand(0);
1178 // Don't transform when the type is x86_amx, it makes the pass that lower
1179 // x86_amx type happy.
1180 if (V->getType()->isX86_AMXTy())
1181 return false;
1182 if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1183 combineStoreToNewValue(IC, SI, V);
1184 return true;
1185 }
1186 }
1187
1188 if (Value *U = likeBitCastFromVector(IC, V))
1189 if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1190 combineStoreToNewValue(IC, SI, U);
1191 return true;
1192 }
1193
1194 // FIXME: We should also canonicalize stores of vectors when their elements
1195 // are cast to other types.
1196 return false;
1197}
1198
1200 // FIXME: We could probably with some care handle both volatile and atomic
1201 // stores here but it isn't clear that this is important.
1202 if (!SI.isSimple())
1203 return false;
1204
1205 Value *V = SI.getValueOperand();
1206 Type *T = V->getType();
1207
1208 if (!T->isAggregateType())
1209 return false;
1210
1211 if (auto *ST = dyn_cast<StructType>(T)) {
1212 // If the struct only have one element, we unpack.
1213 unsigned Count = ST->getNumElements();
1214 if (Count == 1) {
1215 V = IC.Builder.CreateExtractValue(V, 0);
1216 combineStoreToNewValue(IC, SI, V);
1217 return true;
1218 }
1219
1220 // We don't want to break loads with padding here as we'd loose
1221 // the knowledge that padding exists for the rest of the pipeline.
1222 const DataLayout &DL = IC.getDataLayout();
1223 auto *SL = DL.getStructLayout(ST);
1224
1225 // Don't unpack for structure with scalable vector.
1226 if (SL->getSizeInBits().isScalable())
1227 return false;
1228
1229 if (SL->hasPadding())
1230 return false;
1231
1232 const auto Align = SI.getAlign();
1233
1234 SmallString<16> EltName = V->getName();
1235 EltName += ".elt";
1236 auto *Addr = SI.getPointerOperand();
1237 SmallString<16> AddrName = Addr->getName();
1238 AddrName += ".repack";
1239
1240 auto *IdxType = Type::getInt32Ty(ST->getContext());
1241 auto *Zero = ConstantInt::get(IdxType, 0);
1242 for (unsigned i = 0; i < Count; i++) {
1243 Value *Indices[2] = {
1244 Zero,
1245 ConstantInt::get(IdxType, i),
1246 };
1247 auto *Ptr =
1248 IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
1249 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1250 auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
1251 llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1252 NS->setAAMetadata(SI.getAAMetadata());
1253 }
1254
1255 return true;
1256 }
1257
1258 if (auto *AT = dyn_cast<ArrayType>(T)) {
1259 // If the array only have one element, we unpack.
1260 auto NumElements = AT->getNumElements();
1261 if (NumElements == 1) {
1262 V = IC.Builder.CreateExtractValue(V, 0);
1263 combineStoreToNewValue(IC, SI, V);
1264 return true;
1265 }
1266
1267 // Bail out if the array is too large. Ideally we would like to optimize
1268 // arrays of arbitrary size but this has a terrible impact on compile time.
1269 // The threshold here is chosen arbitrarily, maybe needs a little bit of
1270 // tuning.
1271 if (NumElements > IC.MaxArraySizeForCombine)
1272 return false;
1273
1274 const DataLayout &DL = IC.getDataLayout();
1275 TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
1276 const auto Align = SI.getAlign();
1277
1278 SmallString<16> EltName = V->getName();
1279 EltName += ".elt";
1280 auto *Addr = SI.getPointerOperand();
1281 SmallString<16> AddrName = Addr->getName();
1282 AddrName += ".repack";
1283
1284 auto *IdxType = Type::getInt64Ty(T->getContext());
1285 auto *Zero = ConstantInt::get(IdxType, 0);
1286
1288 for (uint64_t i = 0; i < NumElements; i++) {
1289 Value *Indices[2] = {
1290 Zero,
1291 ConstantInt::get(IdxType, i),
1292 };
1293 auto *Ptr =
1294 IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
1295 auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1296 auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
1297 Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1298 NS->setAAMetadata(SI.getAAMetadata());
1299 Offset += EltSize;
1300 }
1301
1302 return true;
1303 }
1304
1305 return false;
1306}
1307
1308/// equivalentAddressValues - Test if A and B will obviously have the same
1309/// value. This includes recognizing that %t0 and %t1 will have the same
1310/// value in code like this:
1311/// %t0 = getelementptr \@a, 0, 3
1312/// store i32 0, i32* %t0
1313/// %t1 = getelementptr \@a, 0, 3
1314/// %t2 = load i32* %t1
1315///
1317 // Test if the values are trivially equivalent.
1318 if (A == B) return true;
1319
1320 // Test if the values come form identical arithmetic instructions.
1321 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1322 // its only used to compare two uses within the same basic block, which
1323 // means that they'll always either have the same value or one of them
1324 // will have an undefined value.
1325 if (isa<BinaryOperator>(A) ||
1326 isa<CastInst>(A) ||
1327 isa<PHINode>(A) ||
1328 isa<GetElementPtrInst>(A))
1329 if (Instruction *BI = dyn_cast<Instruction>(B))
1330 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1331 return true;
1332
1333 // Otherwise they may not be equivalent.
1334 return false;
1335}
1336
1338 Value *Val = SI.getOperand(0);
1339 Value *Ptr = SI.getOperand(1);
1340
1341 // Try to canonicalize the stored type.
1342 if (combineStoreToValueType(*this, SI))
1343 return eraseInstFromFunction(SI);
1344
1345 // Try to canonicalize the stored type.
1346 if (unpackStoreToAggregate(*this, SI))
1347 return eraseInstFromFunction(SI);
1348
1349 // Replace GEP indices if possible.
1350 if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
1351 return replaceOperand(SI, 1, NewGEPI);
1352
1353 // Don't hack volatile/ordered stores.
1354 // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1355 if (!SI.isUnordered()) return nullptr;
1356
1357 // If the RHS is an alloca with a single use, zapify the store, making the
1358 // alloca dead.
1359 if (Ptr->hasOneUse()) {
1360 if (isa<AllocaInst>(Ptr))
1361 return eraseInstFromFunction(SI);
1362 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1363 if (isa<AllocaInst>(GEP->getOperand(0))) {
1364 if (GEP->getOperand(0)->hasOneUse())
1365 return eraseInstFromFunction(SI);
1366 }
1367 }
1368 }
1369
1370 // If we have a store to a location which is known constant, we can conclude
1371 // that the store must be storing the constant value (else the memory
1372 // wouldn't be constant), and this must be a noop.
1374 return eraseInstFromFunction(SI);
1375
1376 // Do really simple DSE, to catch cases where there are several consecutive
1377 // stores to the same location, separated by a few arithmetic operations. This
1378 // situation often occurs with bitfield accesses.
1379 BasicBlock::iterator BBI(SI);
1380 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1381 --ScanInsts) {
1382 --BBI;
1383 // Don't count debug info directives, lest they affect codegen,
1384 // and we skip pointer-to-pointer bitcasts, which are NOPs.
1385 if (BBI->isDebugOrPseudoInst()) {
1386 ScanInsts++;
1387 continue;
1388 }
1389
1390 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1391 // Prev store isn't volatile, and stores to the same location?
1392 if (PrevSI->isUnordered() &&
1393 equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
1394 PrevSI->getValueOperand()->getType() ==
1395 SI.getValueOperand()->getType()) {
1396 ++NumDeadStore;
1397 // Manually add back the original store to the worklist now, so it will
1398 // be processed after the operands of the removed store, as this may
1399 // expose additional DSE opportunities.
1400 Worklist.push(&SI);
1401 eraseInstFromFunction(*PrevSI);
1402 return nullptr;
1403 }
1404 break;
1405 }
1406
1407 // If this is a load, we have to stop. However, if the loaded value is from
1408 // the pointer we're loading and is producing the pointer we're storing,
1409 // then *this* store is dead (X = load P; store X -> P).
1410 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1411 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1412 assert(SI.isUnordered() && "can't eliminate ordering operation");
1413 return eraseInstFromFunction(SI);
1414 }
1415
1416 // Otherwise, this is a load from some other location. Stores before it
1417 // may not be dead.
1418 break;
1419 }
1420
1421 // Don't skip over loads, throws or things that can modify memory.
1422 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1423 break;
1424 }
1425
1426 // store X, null -> turns into 'unreachable' in SimplifyCFG
1427 // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1428 if (canSimplifyNullStoreOrGEP(SI)) {
1429 if (!isa<PoisonValue>(Val))
1430 return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
1431 return nullptr; // Do not modify these!
1432 }
1433
1434 // This is a non-terminator unreachable marker. Don't remove it.
1435 if (isa<UndefValue>(Ptr)) {
1436 // Remove guaranteed-to-transfer instructions before the marker.
1438 return &SI;
1439
1440 // Remove all instructions after the marker and handle dead blocks this
1441 // implies.
1443 handleUnreachableFrom(SI.getNextNode(), Worklist);
1445 return nullptr;
1446 }
1447
1448 // store undef, Ptr -> noop
1449 // FIXME: This is technically incorrect because it might overwrite a poison
1450 // value. Change to PoisonValue once #52930 is resolved.
1451 if (isa<UndefValue>(Val))
1452 return eraseInstFromFunction(SI);
1453
1454 return nullptr;
1455}
1456
1457/// Try to transform:
1458/// if () { *P = v1; } else { *P = v2 }
1459/// or:
1460/// *P = v1; if () { *P = v2; }
1461/// into a phi node with a store in the successor.
1463 if (!SI.isUnordered())
1464 return false; // This code has not been audited for volatile/ordered case.
1465
1466 // Check if the successor block has exactly 2 incoming edges.
1467 BasicBlock *StoreBB = SI.getParent();
1468 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1469 if (!DestBB->hasNPredecessors(2))
1470 return false;
1471
1472 // Capture the other block (the block that doesn't contain our store).
1473 pred_iterator PredIter = pred_begin(DestBB);
1474 if (*PredIter == StoreBB)
1475 ++PredIter;
1476 BasicBlock *OtherBB = *PredIter;
1477
1478 // Bail out if all of the relevant blocks aren't distinct. This can happen,
1479 // for example, if SI is in an infinite loop.
1480 if (StoreBB == DestBB || OtherBB == DestBB)
1481 return false;
1482
1483 // Verify that the other block ends in a branch and is not otherwise empty.
1484 BasicBlock::iterator BBI(OtherBB->getTerminator());
1485 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1486 if (!OtherBr || BBI == OtherBB->begin())
1487 return false;
1488
1489 auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
1490 if (!OtherStore ||
1491 OtherStore->getPointerOperand() != SI.getPointerOperand())
1492 return false;
1493
1494 auto *SIVTy = SI.getValueOperand()->getType();
1495 auto *OSVTy = OtherStore->getValueOperand()->getType();
1496 return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
1497 SI.hasSameSpecialState(OtherStore);
1498 };
1499
1500 // If the other block ends in an unconditional branch, check for the 'if then
1501 // else' case. There is an instruction before the branch.
1502 StoreInst *OtherStore = nullptr;
1503 if (OtherBr->isUnconditional()) {
1504 --BBI;
1505 // Skip over debugging info and pseudo probes.
1506 while (BBI->isDebugOrPseudoInst()) {
1507 if (BBI==OtherBB->begin())
1508 return false;
1509 --BBI;
1510 }
1511 // If this isn't a store, isn't a store to the same location, or is not the
1512 // right kind of store, bail out.
1513 OtherStore = dyn_cast<StoreInst>(BBI);
1514 if (!OtherStoreIsMergeable(OtherStore))
1515 return false;
1516 } else {
1517 // Otherwise, the other block ended with a conditional branch. If one of the
1518 // destinations is StoreBB, then we have the if/then case.
1519 if (OtherBr->getSuccessor(0) != StoreBB &&
1520 OtherBr->getSuccessor(1) != StoreBB)
1521 return false;
1522
1523 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1524 // if/then triangle. See if there is a store to the same ptr as SI that
1525 // lives in OtherBB.
1526 for (;; --BBI) {
1527 // Check to see if we find the matching store.
1528 OtherStore = dyn_cast<StoreInst>(BBI);
1529 if (OtherStoreIsMergeable(OtherStore))
1530 break;
1531
1532 // If we find something that may be using or overwriting the stored
1533 // value, or if we run out of instructions, we can't do the transform.
1534 if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1535 BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1536 return false;
1537 }
1538
1539 // In order to eliminate the store in OtherBr, we have to make sure nothing
1540 // reads or overwrites the stored value in StoreBB.
1541 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1542 // FIXME: This should really be AA driven.
1543 if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1544 return false;
1545 }
1546 }
1547
1548 // Insert a PHI node now if we need it.
1549 Value *MergedVal = OtherStore->getValueOperand();
1550 // The debug locations of the original instructions might differ. Merge them.
1551 DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1552 OtherStore->getDebugLoc());
1553 if (MergedVal != SI.getValueOperand()) {
1554 PHINode *PN =
1555 PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
1556 PN->addIncoming(SI.getValueOperand(), SI.getParent());
1557 Builder.SetInsertPoint(OtherStore);
1558 PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
1559 OtherBB);
1560 MergedVal = InsertNewInstBefore(PN, DestBB->begin());
1561 PN->setDebugLoc(MergedLoc);
1562 }
1563
1564 // Advance to a place where it is safe to insert the new store and insert it.
1565 BBI = DestBB->getFirstInsertionPt();
1566 StoreInst *NewSI =
1567 new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
1568 SI.getOrdering(), SI.getSyncScopeID());
1569 InsertNewInstBefore(NewSI, BBI);
1570 NewSI->setDebugLoc(MergedLoc);
1571 NewSI->mergeDIAssignID({&SI, OtherStore});
1572
1573 // If the two stores had AA tags, merge them.
1574 AAMDNodes AATags = SI.getAAMetadata();
1575 if (AATags)
1576 NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
1577
1578 // Nuke the old stores.
1580 eraseInstFromFunction(*OtherStore);
1581 return true;
1582}
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides internal interfaces used to implement the InstCombine.
static StoreInst * combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
static Instruction * combineLoadToOperationType(InstCombinerImpl &IC, LoadInst &Load)
Combine loads to match the type of their uses' value after looking through intervening bitcasts.
static Instruction * replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr, Instruction &MemI)
static Instruction * simplifyAllocaArraySize(InstCombinerImpl &IC, AllocaInst &AI, DominatorTree &DT)
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isSupportedAtomicType(Type *Ty)
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
static Instruction * unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI)
static cl::opt< unsigned > MaxCopiedFromConstantUsers("instcombine-max-copied-from-constant-users", cl::init(300), cl::desc("Maximum users to visit in copy from constant transform"), cl::Hidden)
static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI)
static Value * likeBitCastFromVector(InstCombinerImpl &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
static bool isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction * > &ToDelete)
isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived) pointer to an alloca.
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
This file provides the interface for the instcombine pass implementation.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Returns a bitmask that should be unconditionally applied to the ModRef info of a memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:99
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:139
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
Definition: Instructions.h:128
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:95
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:481
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Conditional or Unconditional Branch instruction.
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
Definition: DataLayout.cpp:878
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Definition: Instructions.h:956
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
Type * getSourceElementType() const
Definition: Instructions.h:990
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1781
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1882
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:505
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2225
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * visitLoadInst(LoadInst &LI)
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitStoreInst(StoreInst &SI)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; } into a phi node...
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
bool removeInstructionsBeforeUnreachable(Instruction &I)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitAllocaInst(AllocaInst &AI)
SimplifyQuery SQ
Definition: InstCombiner.h:77
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:337
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Definition: InstCombiner.h:368
AAResults * AA
Definition: InstCombiner.h:70
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:388
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
Definition: InstCombiner.h:56
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:65
const DataLayout & DL
Definition: InstCombiner.h:76
AssumptionCache & AC
Definition: InstCombiner.h:73
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:412
DominatorTree & DT
Definition: InstCombiner.h:75
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:433
BuilderTy & Builder
Definition: InstCombiner.h:61
void push(Instruction *I)
Push the instruction onto the worklist stack.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:953
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1764
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1750
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
void setAlignment(Align Align)
Definition: Instructions.h:215
Value * getPointerOperand()
Definition: Instructions.h:255
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
bool isUnordered() const
Definition: Instructions.h:249
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
Metadata node.
Definition: Metadata.h:1073
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
This class wraps the llvm.memcpy/memmove intrinsics.
static constexpr const unsigned PoisonGeneratingIDs[]
Metadata IDs that may generate poison.
Definition: Metadata.h:143
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
bool empty() const
Definition: SmallVector.h:81
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Value * getValueOperand()
Definition: Instructions.h:378
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:364
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getZero()
Definition: TypeSize.h:351
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:310
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:252
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:216
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
Definition: Local.cpp:3448
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:493
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1187
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:386
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2787
Value * simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q)
Given a load instruction and its pointer operand, fold the result or return null.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3439
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1866
auto pred_begin(const MachineBasicBlock *BB)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:764
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:100
SimplifyQuery getWithInstruction(const Instruction *I) const