Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 5e1a55e

Browse files
sdesmalen-armtru
authored andcommittedSep 10, 2024·
[AArch64] Disable SVE paired ld1/st1 for callee-saves.
The functionality to make use of SVE's load/store pair instructions for the callee-saves is broken because the offsets used in the instructions are incorrect. This is addressed by #105518 but given the complexity of this code and the subtleties around calculating the right offsets, we favour disabling the behaviour altogether for LLVM 19. This fix is critical for any programs being compiled with `+sme2`.
1 parent 42f18ee commit 5e1a55e

File tree

5 files changed

+2036
-1217
lines changed

5 files changed

+2036
-1217
lines changed
 

‎llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2931,16 +2931,6 @@ struct RegPairInfo {
29312931

29322932
} // end anonymous namespace
29332933

2934-
unsigned findFreePredicateReg(BitVector &SavedRegs) {
2935-
for (unsigned PReg = AArch64::P8; PReg <= AArch64::P15; ++PReg) {
2936-
if (SavedRegs.test(PReg)) {
2937-
unsigned PNReg = PReg - AArch64::P0 + AArch64::PN0;
2938-
return PNReg;
2939-
}
2940-
}
2941-
return AArch64::NoRegister;
2942-
}
2943-
29442934
static void computeCalleeSaveRegisterPairs(
29452935
MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI,
29462936
const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
@@ -3645,7 +3635,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
36453635

36463636
unsigned ExtraCSSpill = 0;
36473637
bool HasUnpairedGPR64 = false;
3648-
bool HasPairZReg = false;
36493638
// Figure out which callee-saved registers to save/restore.
36503639
for (unsigned i = 0; CSRegs[i]; ++i) {
36513640
const unsigned Reg = CSRegs[i];
@@ -3699,28 +3688,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
36993688
!RegInfo->isReservedReg(MF, PairedReg))
37003689
ExtraCSSpill = PairedReg;
37013690
}
3702-
// Check if there is a pair of ZRegs, so it can select PReg for spill/fill
3703-
HasPairZReg |= (AArch64::ZPRRegClass.contains(Reg, CSRegs[i ^ 1]) &&
3704-
SavedRegs.test(CSRegs[i ^ 1]));
3705-
}
3706-
3707-
if (HasPairZReg && (Subtarget.hasSVE2p1() || Subtarget.hasSME2())) {
3708-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
3709-
// Find a suitable predicate register for the multi-vector spill/fill
3710-
// instructions.
3711-
unsigned PnReg = findFreePredicateReg(SavedRegs);
3712-
if (PnReg != AArch64::NoRegister)
3713-
AFI->setPredicateRegForFillSpill(PnReg);
3714-
// If no free callee-save has been found assign one.
3715-
if (!AFI->getPredicateRegForFillSpill() &&
3716-
MF.getFunction().getCallingConv() ==
3717-
CallingConv::AArch64_SVE_VectorCall) {
3718-
SavedRegs.set(AArch64::P8);
3719-
AFI->setPredicateRegForFillSpill(AArch64::PN8);
3720-
}
3721-
3722-
assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) &&
3723-
"Predicate cannot be a reserved register");
37243691
}
37253692

37263693
if (MF.getFunction().getCallingConv() == CallingConv::Win64 &&

‎llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -329,27 +329,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
329329
; CHECK-NEXT: .cfi_offset w29, -32
330330
; CHECK-NEXT: addvl sp, sp, #-18
331331
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
332-
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333-
; CHECK-NEXT: ptrue pn8.b
334332
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335-
; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
336-
; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
337333
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338-
; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
339-
; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
340334
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341-
; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
342-
; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
343335
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344-
; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
345336
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346337
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347338
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
339+
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
348340
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349341
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350342
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351343
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352-
; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
344+
; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
345+
; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
346+
; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
347+
; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
348+
; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
349+
; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
350+
; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
351+
; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
352+
; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
353+
; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
354+
; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
355+
; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
356+
; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
357+
; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
358+
; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
359+
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
353360
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
354361
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
355362
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -371,16 +378,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
371378
; CHECK-NEXT: .cfi_restore vg
372379
; CHECK-NEXT: addvl sp, sp, #1
373380
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
374-
; CHECK-NEXT: ptrue pn8.b
381+
; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
382+
; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
383+
; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
384+
; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
385+
; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
386+
; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
387+
; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
388+
; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
389+
; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
390+
; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
391+
; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
392+
; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
393+
; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
394+
; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
395+
; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
396+
; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
375397
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
376-
; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
377-
; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
378-
; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
379-
; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
380-
; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
381-
; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
382-
; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
383-
; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
384398
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
385399
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
386400
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -424,27 +438,34 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
424438
; FP-CHECK-NEXT: .cfi_offset w30, -40
425439
; FP-CHECK-NEXT: .cfi_offset w29, -48
426440
; FP-CHECK-NEXT: addvl sp, sp, #-18
427-
; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
428-
; FP-CHECK-NEXT: ptrue pn8.b
429441
; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
430-
; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
431-
; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
432442
; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
433-
; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
434-
; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
435443
; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
436-
; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
437-
; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
438444
; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
439-
; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
440445
; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
441446
; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
442447
; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
448+
; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
443449
; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
444450
; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
445451
; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
446452
; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
447-
; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
453+
; FP-CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
454+
; FP-CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
455+
; FP-CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
456+
; FP-CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
457+
; FP-CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
458+
; FP-CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
459+
; FP-CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
460+
; FP-CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
461+
; FP-CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
462+
; FP-CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
463+
; FP-CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
464+
; FP-CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
465+
; FP-CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
466+
; FP-CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
467+
; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
468+
; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
448469
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
449470
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
450471
; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -464,16 +485,23 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
464485
; FP-CHECK-NEXT: smstart sm
465486
; FP-CHECK-NEXT: .cfi_restore vg
466487
; FP-CHECK-NEXT: addvl sp, sp, #1
467-
; FP-CHECK-NEXT: ptrue pn8.b
488+
; FP-CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
489+
; FP-CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
490+
; FP-CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
491+
; FP-CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
492+
; FP-CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
493+
; FP-CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
494+
; FP-CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
495+
; FP-CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
496+
; FP-CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
497+
; FP-CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
498+
; FP-CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
499+
; FP-CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
500+
; FP-CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
501+
; FP-CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
502+
; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
503+
; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
468504
; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
469-
; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
470-
; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
471-
; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
472-
; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
473-
; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
474-
; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
475-
; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
476-
; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
477505
; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
478506
; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
479507
; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload

‎llvm/test/CodeGen/AArch64/sme2-intrinsics-ld1.ll

Lines changed: 944 additions & 544 deletions
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AArch64/sme2-intrinsics-ldnt1.ll

Lines changed: 944 additions & 544 deletions
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AArch64/sve-callee-save-restore-pairs.ll

Lines changed: 82 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -88,27 +88,34 @@ define void @fbyte(<vscale x 16 x i8> %v) {
8888
; PAIR: // %bb.0:
8989
; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
9090
; PAIR-NEXT: addvl sp, sp, #-18
91-
; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
92-
; PAIR-NEXT: ptrue pn8.b
9391
; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
94-
; PAIR-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
95-
; PAIR-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
9692
; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
97-
; PAIR-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
98-
; PAIR-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
9993
; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
100-
; PAIR-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
101-
; PAIR-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
10294
; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
103-
; PAIR-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
10495
; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
10596
; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
10697
; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
98+
; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
10799
; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
108100
; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
109101
; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
110102
; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
111-
; PAIR-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
103+
; PAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
104+
; PAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
105+
; PAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
106+
; PAIR-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
107+
; PAIR-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
108+
; PAIR-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
109+
; PAIR-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
110+
; PAIR-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
111+
; PAIR-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
112+
; PAIR-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
113+
; PAIR-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
114+
; PAIR-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
115+
; PAIR-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
116+
; PAIR-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
117+
; PAIR-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
118+
; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
112119
; PAIR-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG
113120
; PAIR-NEXT: .cfi_offset w30, -8
114121
; PAIR-NEXT: .cfi_offset w29, -16
@@ -121,16 +128,23 @@ define void @fbyte(<vscale x 16 x i8> %v) {
121128
; PAIR-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
122129
; PAIR-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
123130
; PAIR-NEXT: bl my_func
124-
; PAIR-NEXT: ptrue pn8.b
131+
; PAIR-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
132+
; PAIR-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
133+
; PAIR-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
134+
; PAIR-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
135+
; PAIR-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
136+
; PAIR-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
137+
; PAIR-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
138+
; PAIR-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
139+
; PAIR-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
140+
; PAIR-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
141+
; PAIR-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
142+
; PAIR-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
143+
; PAIR-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
144+
; PAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
145+
; PAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
146+
; PAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
125147
; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
126-
; PAIR-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
127-
; PAIR-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
128-
; PAIR-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
129-
; PAIR-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
130-
; PAIR-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
131-
; PAIR-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
132-
; PAIR-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
133-
; PAIR-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
134148
; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
135149
; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
136150
; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -230,27 +244,34 @@ define void @fhalf(<vscale x 8 x half> %v) {
230244
; PAIR: // %bb.0:
231245
; PAIR-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
232246
; PAIR-NEXT: addvl sp, sp, #-18
233-
; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
234-
; PAIR-NEXT: ptrue pn8.b
235247
; PAIR-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
236-
; PAIR-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
237-
; PAIR-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8, mul vl] // 32-byte Folded Spill
238248
; PAIR-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
239-
; PAIR-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12, mul vl] // 32-byte Folded Spill
240-
; PAIR-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16, mul vl] // 32-byte Folded Spill
241249
; PAIR-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
242-
; PAIR-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20, mul vl] // 32-byte Folded Spill
243-
; PAIR-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24, mul vl] // 32-byte Folded Spill
244250
; PAIR-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
245-
; PAIR-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28, mul vl] // 32-byte Folded Spill
246251
; PAIR-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
247252
; PAIR-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
248253
; PAIR-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
254+
; PAIR-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
249255
; PAIR-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
250256
; PAIR-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
251257
; PAIR-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
252258
; PAIR-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
253-
; PAIR-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
259+
; PAIR-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill
260+
; PAIR-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill
261+
; PAIR-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill
262+
; PAIR-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill
263+
; PAIR-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill
264+
; PAIR-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill
265+
; PAIR-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill
266+
; PAIR-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill
267+
; PAIR-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill
268+
; PAIR-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill
269+
; PAIR-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill
270+
; PAIR-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill
271+
; PAIR-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill
272+
; PAIR-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill
273+
; PAIR-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
274+
; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
254275
; PAIR-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 144 * VG
255276
; PAIR-NEXT: .cfi_offset w30, -8
256277
; PAIR-NEXT: .cfi_offset w29, -16
@@ -263,16 +284,23 @@ define void @fhalf(<vscale x 8 x half> %v) {
263284
; PAIR-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
264285
; PAIR-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
265286
; PAIR-NEXT: bl my_func
266-
; PAIR-NEXT: ptrue pn8.b
287+
; PAIR-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
288+
; PAIR-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
289+
; PAIR-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
290+
; PAIR-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
291+
; PAIR-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
292+
; PAIR-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
293+
; PAIR-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
294+
; PAIR-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
295+
; PAIR-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
296+
; PAIR-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
297+
; PAIR-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
298+
; PAIR-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
299+
; PAIR-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
300+
; PAIR-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
301+
; PAIR-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
302+
; PAIR-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
267303
; PAIR-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
268-
; PAIR-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
269-
; PAIR-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
270-
; PAIR-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
271-
; PAIR-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
272-
; PAIR-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
273-
; PAIR-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
274-
; PAIR-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
275-
; PAIR-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
276304
; PAIR-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
277305
; PAIR-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
278306
; PAIR-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -323,23 +351,21 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs() {
323351
; PAIR: // %bb.0:
324352
; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
325353
; PAIR-NEXT: addvl sp, sp, #-4
326-
; PAIR-NEXT: str p8, [sp, #5, mul vl] // 2-byte Folded Spill
327-
; PAIR-NEXT: ptrue pn8.b
328354
; PAIR-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
329355
; PAIR-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
330356
; PAIR-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill
331-
; PAIR-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #4, mul vl] // 32-byte Folded Spill
357+
; PAIR-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill
358+
; PAIR-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill
332359
; PAIR-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
333360
; PAIR-NEXT: .cfi_offset w29, -16
334361
; PAIR-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
335362
; PAIR-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
336363
; PAIR-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
337364
; PAIR-NEXT: //APP
338365
; PAIR-NEXT: //NO_APP
339-
; PAIR-NEXT: ptrue pn8.b
340366
; PAIR-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload
341-
; PAIR-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
342-
; PAIR-NEXT: ldr p8, [sp, #5, mul vl] // 2-byte Folded Reload
367+
; PAIR-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload
368+
; PAIR-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload
343369
; PAIR-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
344370
; PAIR-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
345371
; PAIR-NEXT: addvl sp, sp, #4
@@ -381,22 +407,22 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_p_regs2() {
381407
; PAIR: // %bb.0:
382408
; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
383409
; PAIR-NEXT: addvl sp, sp, #-4
384-
; PAIR-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill
385-
; PAIR-NEXT: ptrue pn9.b
386410
; PAIR-NEXT: str p10, [sp, #6, mul vl] // 2-byte Folded Spill
411+
; PAIR-NEXT: str p9, [sp, #7, mul vl] // 2-byte Folded Spill
387412
; PAIR-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill
388-
; PAIR-NEXT: st1b { z8.b, z9.b }, pn9, [sp, #4, mul vl] // 32-byte Folded Spill
413+
; PAIR-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill
414+
; PAIR-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill
389415
; PAIR-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
390416
; PAIR-NEXT: .cfi_offset w29, -16
391417
; PAIR-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
392418
; PAIR-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
393419
; PAIR-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
394420
; PAIR-NEXT: //APP
395421
; PAIR-NEXT: //NO_APP
396-
; PAIR-NEXT: ptrue pn9.b
397422
; PAIR-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload
423+
; PAIR-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload
424+
; PAIR-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload
398425
; PAIR-NEXT: ldr p10, [sp, #6, mul vl] // 2-byte Folded Reload
399-
; PAIR-NEXT: ld1b { z8.b, z9.b }, pn9/z, [sp, #4, mul vl] // 32-byte Folded Reload
400426
; PAIR-NEXT: ldr p9, [sp, #7, mul vl] // 2-byte Folded Reload
401427
; PAIR-NEXT: addvl sp, sp, #4
402428
; PAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -429,20 +455,18 @@ define aarch64_sve_vector_pcs void @test_clobbers_z_regs() {
429455
; PAIR-LABEL: test_clobbers_z_regs:
430456
; PAIR: // %bb.0:
431457
; PAIR-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
432-
; PAIR-NEXT: addvl sp, sp, #-3
433-
; PAIR-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
434-
; PAIR-NEXT: ptrue pn8.b
435-
; PAIR-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #2, mul vl] // 32-byte Folded Spill
436-
; PAIR-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
458+
; PAIR-NEXT: addvl sp, sp, #-2
459+
; PAIR-NEXT: str z9, [sp] // 16-byte Folded Spill
460+
; PAIR-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill
461+
; PAIR-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
437462
; PAIR-NEXT: .cfi_offset w29, -16
438463
; PAIR-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
439464
; PAIR-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
440465
; PAIR-NEXT: //APP
441466
; PAIR-NEXT: //NO_APP
442-
; PAIR-NEXT: ptrue pn8.b
443-
; PAIR-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
444-
; PAIR-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
445-
; PAIR-NEXT: addvl sp, sp, #3
467+
; PAIR-NEXT: ldr z9, [sp] // 16-byte Folded Reload
468+
; PAIR-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload
469+
; PAIR-NEXT: addvl sp, sp, #2
446470
; PAIR-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
447471
; PAIR-NEXT: ret
448472
call void asm sideeffect "", "~{z8},~{z9}"()

0 commit comments

Comments
 (0)
Please sign in to comment.