diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f97386159d029..bb85f8be92163 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20856,9 +20856,16 @@ namespace { /// A group of stores that we'll try to bundle together using vector ops. /// They are ordered using the signed distance of their address operand to the /// address of this group's BaseInstr. -struct RelatedStoreInsts { - RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); } +class RelatedStoreInsts { +public: + RelatedStoreInsts(unsigned BaseInstrIdx, ArrayRef AllStores) + : AllStores(AllStores) { + reset(BaseInstrIdx); + } + void reset(unsigned NewBaseInstr) { + assert(NewBaseInstr < AllStores.size() && + "Instruction index out of bounds"); BaseInstrIdx = NewBaseInstr; Instrs.clear(); insertOrLookup(NewBaseInstr, 0); @@ -20873,12 +20880,58 @@ struct RelatedStoreInsts { return Inserted ? std::nullopt : std::optional(It->second); } + using DistToInstMap = std::map; + const DistToInstMap &getStores() const { return Instrs; } + + /// If \p SI is related to this group of stores, return the distance of its + /// pointer operand to the one the group's BaseInstr. + std::optional getPointerDiff(StoreInst &SI, const DataLayout &DL, + ScalarEvolution &SE) const { + StoreInst &BaseStore = *AllStores[BaseInstrIdx]; + return getPointersDiff( + BaseStore.getValueOperand()->getType(), BaseStore.getPointerOperand(), + SI.getValueOperand()->getType(), SI.getPointerOperand(), DL, SE, + /*StrictCheck=*/true); + } + + /// Recompute the pointer distances to be based on \p NewBaseInstIdx. + /// Stores whose index is less than \p MinSafeIdx will be dropped. + void rebase(unsigned MinSafeIdx, unsigned NewBaseInstIdx, + int DistFromCurBase) { + DistToInstMap PrevSet = std::move(Instrs); + reset(NewBaseInstIdx); + + // Re-insert stores that come after MinSafeIdx to try and vectorize them + // again. Their distance will be "rebased" to use NewBaseInstIdx as + // reference. + for (auto [Dist, InstIdx] : PrevSet) { + if (InstIdx >= MinSafeIdx) + insertOrLookup(InstIdx, Dist - DistFromCurBase); + } + } + + /// Remove all stores that have been vectorized from this group. + void clearVectorizedStores(const BoUpSLP::ValueSet &VectorizedStores) { + DistToInstMap::reverse_iterator LastVectorizedStore = find_if( + reverse(Instrs), [&](const std::pair &DistAndIdx) { + return VectorizedStores.contains(AllStores[DistAndIdx.second]); + }); + + // Get a forward iterator pointing after the last vectorized store and erase + // all stores before it so we don't try to vectorize them again. + DistToInstMap::iterator VectorizedStoresEnd = LastVectorizedStore.base(); + Instrs.erase(Instrs.begin(), VectorizedStoresEnd); + } + +private: /// The index of the Base instruction, i.e. the one with a 0 pointer distance. unsigned BaseInstrIdx; /// Maps a pointer distance from \p BaseInstrIdx to an instruction index. - using DistToInstMap = std::map; DistToInstMap Instrs; + + /// Reference to all the stores in the BB being analyzed. + ArrayRef AllStores; }; } // end anonymous namespace @@ -21166,14 +21219,7 @@ bool SLPVectorizerPass::vectorizeStores( } }; - // Stores pair (first: index of the store into Stores array ref, address of - // which taken as base, second: sorted set of pairs {index, dist}, which are - // indices of stores in the set and their store location distances relative to - // the base address). - - // Need to store the index of the very first store separately, since the set - // may be reordered after the insertion and the first store may be moved. This - // container allows to reduce number of calls of getPointersDiff() function. + /// Groups of stores to vectorize SmallVector SortedStores; // Inserts the specified store SI with the given index Idx to the set of the @@ -21209,52 +21255,30 @@ bool SLPVectorizerPass::vectorizeStores( // dependencies and no need to waste compile time to try to vectorize them. // - Try to vectorize the sequence {1, {1, 0}, {3, 2}}. auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) { - for (RelatedStoreInsts &StoreSeq : SortedStores) { - std::optional Diff = getPointersDiff( - Stores[StoreSeq.BaseInstrIdx]->getValueOperand()->getType(), - Stores[StoreSeq.BaseInstrIdx]->getPointerOperand(), - SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE, - /*StrictCheck=*/true); - if (!Diff) - continue; - std::optional PrevInst = - StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff); - if (!PrevInst) { - // No store was associated to that distance. Keep collecting. - return; - } - // Try to vectorize the first found set to avoid duplicate analysis. - TryToVectorize(StoreSeq.Instrs); - RelatedStoreInsts::DistToInstMap PrevSet; - copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()), - [&](const std::pair &DistAndIdx) { - return DistAndIdx.second > *PrevInst; - }); - StoreSeq.reset(Idx); - // Insert stores that followed previous match to try to vectorize them - // with this store. - unsigned StartIdx = *PrevInst + 1; - SmallBitVector UsedStores(Idx - StartIdx); - // Distances to previously found dup store (or this store, since they - // store to the same addresses). - SmallVector Dists(Idx - StartIdx, 0); - for (auto [PtrDist, InstIdx] : reverse(PrevSet)) { - // Do not try to vectorize sequences, we already tried. - if (VectorizedStores.contains(Stores[InstIdx])) - break; - unsigned BI = InstIdx - StartIdx; - UsedStores.set(BI); - Dists[BI] = PtrDist - *Diff; - } - for (unsigned I = StartIdx; I < Idx; ++I) { - unsigned BI = I - StartIdx; - if (UsedStores.test(BI)) - StoreSeq.insertOrLookup(I, Dists[BI]); - } + std::optional PtrDist; + auto *RelatedStores = find_if( + SortedStores, [&PtrDist, SI, this](const RelatedStoreInsts &StoreSeq) { + PtrDist = StoreSeq.getPointerDiff(*SI, *DL, *SE); + return PtrDist.has_value(); + }); + + // We did not find a comparable store, start a new group. + if (RelatedStores == SortedStores.end()) { + SortedStores.emplace_back(Idx, Stores); return; } - // We did not find a comparable store, start a new sequence. - SortedStores.emplace_back(Idx); + + // If there is already a store in the group with the same PtrDiff, try to + // vectorize the existing instructions before adding the current store. + // Otherwise, insert this store and keep collecting. + if (std::optional PrevInst = + RelatedStores->insertOrLookup(Idx, *PtrDist)) { + TryToVectorize(RelatedStores->getStores()); + RelatedStores->clearVectorizedStores(VectorizedStores); + RelatedStores->rebase(/*MinSafeIdx=*/*PrevInst + 1, + /*NewBaseInstIdx=*/Idx, + /*DistFromCurBase=*/*PtrDist); + } }; Type *PrevValTy = nullptr; for (auto [I, SI] : enumerate(Stores)) { @@ -21265,7 +21289,7 @@ bool SLPVectorizerPass::vectorizeStores( // Check that we do not try to vectorize stores of different types. if (PrevValTy != SI->getValueOperand()->getType()) { for (RelatedStoreInsts &StoreSeq : SortedStores) - TryToVectorize(StoreSeq.Instrs); + TryToVectorize(StoreSeq.getStores()); SortedStores.clear(); PrevValTy = SI->getValueOperand()->getType(); } @@ -21274,7 +21298,7 @@ bool SLPVectorizerPass::vectorizeStores( // Final vectorization attempt. for (RelatedStoreInsts &StoreSeq : SortedStores) - TryToVectorize(StoreSeq.Instrs); + TryToVectorize(StoreSeq.getStores()); return Changed; }