Skip to content

[CodeGen] Combine two loops in SlotIndexes.cpp file #127631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions llvm/lib/CodeGen/SlotIndexes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
IndexList::iterator ListI = endIdx.listEntry()->getIterator();
MachineBasicBlock::iterator MBBI = End;
bool pastStart = false;
bool OldIndexesRemoved = false;
while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
assert(ListI->getIndex() >= startIdx.getIndex() &&
(includeStart || !pastStart) &&
Expand All @@ -220,32 +221,40 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
MachineInstr *SlotMI = ListI->getInstr();
MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
bool MIIndexNotFound = MI && !mi2iMap.contains(MI);
bool SlotMIRemoved = false;

if (SlotMI == MI && !MBBIAtBegin) {
--ListI;
if (MBBI != Begin)
--MBBI;
else
pastStart = true;
} else if (MI && !mi2iMap.contains(MI)) {
} else if (MIIndexNotFound || OldIndexesRemoved) {
if (MBBI != Begin)
--MBBI;
else
pastStart = true;
} else {
--ListI;
if (SlotMI)
// We ran through all the indexes on the interval
// -> The only thing left is to go through all the
// remaining MBB instructions and update their indexes
if (ListI == ListB)
OldIndexesRemoved = true;
else
--ListI;
if (SlotMI) {
removeMachineInstrFromMaps(*SlotMI);
SlotMIRemoved = true;
}
}
}

// In theory this could be combined with the previous loop, but it is tricky
// to update the IndexList while we are iterating it.
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
insertMachineInstrInMaps(MI);
MachineInstr *InstrToInsert = SlotMIRemoved ? SlotMI : MI;

// Insert instruction back into the maps after passing it/removing the index
if ((MIIndexNotFound || SlotMIRemoved) && InstrToInsert->getParent() &&
!InstrToInsert->isDebugOrPseudoInstr())
insertMachineInstrInMaps(*InstrToInsert);
}
}

Expand Down
198 changes: 132 additions & 66 deletions llvm/test/CodeGen/Thumb2/mve-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -223,18 +223,31 @@ entry:
}

define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
; CHECK-LABEL: shuffle3_i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovx.f16 s5, s3
; CHECK-NEXT: vmovx.f16 s6, s1
; CHECK-NEXT: vmovx.f16 s4, s0
; CHECK-NEXT: vins.f16 s1, s0
; CHECK-NEXT: vins.f16 s6, s4
; CHECK-NEXT: vins.f16 s5, s3
; CHECK-NEXT: vmov.f32 s4, s2
; CHECK-NEXT: vmov.f32 s7, s1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_i16:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmovx.f16 s5, s3
; CHECK-LV-NEXT: vmovx.f16 s6, s1
; CHECK-LV-NEXT: vmovx.f16 s4, s0
; CHECK-LV-NEXT: vins.f16 s1, s0
; CHECK-LV-NEXT: vins.f16 s6, s4
; CHECK-LV-NEXT: vins.f16 s5, s3
; CHECK-LV-NEXT: vmov.f32 s4, s2
; CHECK-LV-NEXT: vmov.f32 s7, s1
; CHECK-LV-NEXT: vmov q0, q1
; CHECK-LV-NEXT: bx lr

; CHECK-LIS-LABEL: shuffle3_i16:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmov q1, q0
; CHECK-LIS-NEXT: vmovx.f16 s2, s5
; CHECK-LIS-NEXT: vmovx.f16 s0, s4
; CHECK-LIS-NEXT: vins.f16 s5, s4
; CHECK-LIS-NEXT: vins.f16 s2, s0
; CHECK-LIS-NEXT: vmov.f32 s0, s6
; CHECK-LIS-NEXT: vmovx.f16 s1, s7
; CHECK-LIS-NEXT: vmov.f32 s3, s5
; CHECK-LIS-NEXT: vins.f16 s1, s7
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x i16> %out
Expand Down Expand Up @@ -1145,18 +1158,31 @@ entry:
}

define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
; CHECK-LABEL: shuffle3_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovx.f16 s5, s3
; CHECK-NEXT: vmovx.f16 s6, s1
; CHECK-NEXT: vmovx.f16 s4, s0
; CHECK-NEXT: vins.f16 s1, s0
; CHECK-NEXT: vins.f16 s6, s4
; CHECK-NEXT: vins.f16 s5, s3
; CHECK-NEXT: vmov.f32 s4, s2
; CHECK-NEXT: vmov.f32 s7, s1
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle3_f16:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: vmovx.f16 s5, s3
; CHECK-LV-NEXT: vmovx.f16 s6, s1
; CHECK-LV-NEXT: vmovx.f16 s4, s0
; CHECK-LV-NEXT: vins.f16 s1, s0
; CHECK-LV-NEXT: vins.f16 s6, s4
; CHECK-LV-NEXT: vins.f16 s5, s3
; CHECK-LV-NEXT: vmov.f32 s4, s2
; CHECK-LV-NEXT: vmov.f32 s7, s1
; CHECK-LV-NEXT: vmov q0, q1
; CHECK-LV-NEXT: bx lr

; CHECK-LIS-LABEL: shuffle3_f16:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: vmov q1, q0
; CHECK-LIS-NEXT: vmovx.f16 s2, s5
; CHECK-LIS-NEXT: vmovx.f16 s0, s4
; CHECK-LIS-NEXT: vins.f16 s5, s4
; CHECK-LIS-NEXT: vins.f16 s2, s0
; CHECK-LIS-NEXT: vmov.f32 s0, s6
; CHECK-LIS-NEXT: vmovx.f16 s1, s7
; CHECK-LIS-NEXT: vmov.f32 s3, s5
; CHECK-LIS-NEXT: vins.f16 s1, s7
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
ret <8 x half> %out
Expand Down Expand Up @@ -1467,27 +1493,47 @@ entry:
ret <2 x double> %out
}
define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x double> %src2) {
; CHECK-LABEL: shuffle9_f64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q5, q2
; CHECK-NEXT: vmov.f32 s16, s0
; CHECK-NEXT: vmov.f32 s18, s20
; CHECK-NEXT: vmov.f32 s20, s2
; CHECK-NEXT: vmov.f32 s10, s12
; CHECK-NEXT: vmov.f32 s19, s21
; CHECK-NEXT: vmov.f32 s8, s4
; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vmov.f32 s21, s3
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vmov.f32 s12, s6
; CHECK-NEXT: vmov.f32 s11, s13
; CHECK-NEXT: vmov.f32 s9, s5
; CHECK-NEXT: vmov.f32 s13, s7
; CHECK-NEXT: vmov q1, q5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle9_f64:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
; CHECK-LV-NEXT: vmov q5, q2
; CHECK-LV-NEXT: vmov.f32 s16, s0
; CHECK-LV-NEXT: vmov.f32 s18, s20
; CHECK-LV-NEXT: vmov.f32 s20, s2
; CHECK-LV-NEXT: vmov.f32 s10, s12
; CHECK-LV-NEXT: vmov.f32 s19, s21
; CHECK-LV-NEXT: vmov.f32 s8, s4
; CHECK-LV-NEXT: vmov.f32 s17, s1
; CHECK-LV-NEXT: vmov.f32 s21, s3
; CHECK-LV-NEXT: vmov q0, q4
; CHECK-LV-NEXT: vmov.f32 s12, s6
; CHECK-LV-NEXT: vmov.f32 s11, s13
; CHECK-LV-NEXT: vmov.f32 s9, s5
; CHECK-LV-NEXT: vmov.f32 s13, s7
; CHECK-LV-NEXT: vmov q1, q5
; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
; CHECK-LV-NEXT: bx lr

; CHECK-LIS-LABEL: shuffle9_f64:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
; CHECK-LIS-NEXT: vmov q5, q2
; CHECK-LIS-NEXT: vmov q4, q0
; CHECK-LIS-NEXT: vmov.f32 s2, s20
; CHECK-LIS-NEXT: vmov.f32 s20, s18
; CHECK-LIS-NEXT: vmov.f32 s10, s12
; CHECK-LIS-NEXT: vmov.f32 s3, s21
; CHECK-LIS-NEXT: vmov.f32 s8, s4
; CHECK-LIS-NEXT: vmov.f32 s21, s19
; CHECK-LIS-NEXT: vmov.f32 s12, s6
; CHECK-LIS-NEXT: vmov.f32 s11, s13
; CHECK-LIS-NEXT: vmov.f32 s9, s5
; CHECK-LIS-NEXT: vmov.f32 s13, s7
; CHECK-LIS-NEXT: vmov q1, q5
; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <4 x double> %src1, <4 x double> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x double> %out
Expand Down Expand Up @@ -1560,27 +1606,47 @@ entry:
ret <2 x i64> %out
}
define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) {
; CHECK-LABEL: shuffle9_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov q5, q2
; CHECK-NEXT: vmov.f32 s16, s0
; CHECK-NEXT: vmov.f32 s18, s20
; CHECK-NEXT: vmov.f32 s20, s2
; CHECK-NEXT: vmov.f32 s10, s12
; CHECK-NEXT: vmov.f32 s19, s21
; CHECK-NEXT: vmov.f32 s8, s4
; CHECK-NEXT: vmov.f32 s17, s1
; CHECK-NEXT: vmov.f32 s21, s3
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vmov.f32 s12, s6
; CHECK-NEXT: vmov.f32 s11, s13
; CHECK-NEXT: vmov.f32 s9, s5
; CHECK-NEXT: vmov.f32 s13, s7
; CHECK-NEXT: vmov q1, q5
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
; CHECK-LV-LABEL: shuffle9_i64:
; CHECK-LV: @ %bb.0: @ %entry
; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
; CHECK-LV-NEXT: vmov q5, q2
; CHECK-LV-NEXT: vmov.f32 s16, s0
; CHECK-LV-NEXT: vmov.f32 s18, s20
; CHECK-LV-NEXT: vmov.f32 s20, s2
; CHECK-LV-NEXT: vmov.f32 s10, s12
; CHECK-LV-NEXT: vmov.f32 s19, s21
; CHECK-LV-NEXT: vmov.f32 s8, s4
; CHECK-LV-NEXT: vmov.f32 s17, s1
; CHECK-LV-NEXT: vmov.f32 s21, s3
; CHECK-LV-NEXT: vmov q0, q4
; CHECK-LV-NEXT: vmov.f32 s12, s6
; CHECK-LV-NEXT: vmov.f32 s11, s13
; CHECK-LV-NEXT: vmov.f32 s9, s5
; CHECK-LV-NEXT: vmov.f32 s13, s7
; CHECK-LV-NEXT: vmov q1, q5
; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
; CHECK-LV-NEXT: bx lr

; CHECK-LIS-LABEL: shuffle9_i64:
; CHECK-LIS: @ %bb.0: @ %entry
; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
; CHECK-LIS-NEXT: vmov q5, q2
; CHECK-LIS-NEXT: vmov q4, q0
; CHECK-LIS-NEXT: vmov.f32 s2, s20
; CHECK-LIS-NEXT: vmov.f32 s20, s18
; CHECK-LIS-NEXT: vmov.f32 s10, s12
; CHECK-LIS-NEXT: vmov.f32 s3, s21
; CHECK-LIS-NEXT: vmov.f32 s8, s4
; CHECK-LIS-NEXT: vmov.f32 s21, s19
; CHECK-LIS-NEXT: vmov.f32 s12, s6
; CHECK-LIS-NEXT: vmov.f32 s11, s13
; CHECK-LIS-NEXT: vmov.f32 s9, s5
; CHECK-LIS-NEXT: vmov.f32 s13, s7
; CHECK-LIS-NEXT: vmov q1, q5
; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
; CHECK-LIS-NEXT: bx lr
entry:
%out = shufflevector <4 x i64> %src1, <4 x i64> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i64> %out
Expand Down
Loading