Skip to content

Commit 4a0212b

Browse files
authored
[CodeGen] Combine two loops in SlotIndexes.cpp file (#127631)
Merged two loops that were iterating over the same machine basic block into one, also did some minor readability improvements (variable renaming, commenting and absorbing if condition into a variable)
1 parent ca0850f commit 4a0212b

File tree

3 files changed

+964
-484
lines changed

3 files changed

+964
-484
lines changed

llvm/lib/CodeGen/SlotIndexes.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
212212
IndexList::iterator ListI = endIdx.listEntry()->getIterator();
213213
MachineBasicBlock::iterator MBBI = End;
214214
bool pastStart = false;
215+
bool OldIndexesRemoved = false;
215216
while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
216217
assert(ListI->getIndex() >= startIdx.getIndex() &&
217218
(includeStart || !pastStart) &&
@@ -220,32 +221,40 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
220221
MachineInstr *SlotMI = ListI->getInstr();
221222
MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
222223
bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
224+
bool MIIndexNotFound = MI && !mi2iMap.contains(MI);
225+
bool SlotMIRemoved = false;
223226

224227
if (SlotMI == MI && !MBBIAtBegin) {
225228
--ListI;
226229
if (MBBI != Begin)
227230
--MBBI;
228231
else
229232
pastStart = true;
230-
} else if (MI && !mi2iMap.contains(MI)) {
233+
} else if (MIIndexNotFound || OldIndexesRemoved) {
231234
if (MBBI != Begin)
232235
--MBBI;
233236
else
234237
pastStart = true;
235238
} else {
236-
--ListI;
237-
if (SlotMI)
239+
// We ran through all the indexes on the interval
240+
// -> The only thing left is to go through all the
241+
// remaining MBB instructions and update their indexes
242+
if (ListI == ListB)
243+
OldIndexesRemoved = true;
244+
else
245+
--ListI;
246+
if (SlotMI) {
238247
removeMachineInstrFromMaps(*SlotMI);
248+
SlotMIRemoved = true;
249+
}
239250
}
240-
}
241251

242-
// In theory this could be combined with the previous loop, but it is tricky
243-
// to update the IndexList while we are iterating it.
244-
for (MachineBasicBlock::iterator I = End; I != Begin;) {
245-
--I;
246-
MachineInstr &MI = *I;
247-
if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
248-
insertMachineInstrInMaps(MI);
252+
MachineInstr *InstrToInsert = SlotMIRemoved ? SlotMI : MI;
253+
254+
// Insert instruction back into the maps after passing it/removing the index
255+
if ((MIIndexNotFound || SlotMIRemoved) && InstrToInsert->getParent() &&
256+
!InstrToInsert->isDebugOrPseudoInstr())
257+
insertMachineInstrInMaps(*InstrToInsert);
249258
}
250259
}
251260

llvm/test/CodeGen/Thumb2/mve-shuffle.ll

Lines changed: 132 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -223,18 +223,31 @@ entry:
223223
}
224224

225225
define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
226-
; CHECK-LABEL: shuffle3_i16:
227-
; CHECK: @ %bb.0: @ %entry
228-
; CHECK-NEXT: vmovx.f16 s5, s3
229-
; CHECK-NEXT: vmovx.f16 s6, s1
230-
; CHECK-NEXT: vmovx.f16 s4, s0
231-
; CHECK-NEXT: vins.f16 s1, s0
232-
; CHECK-NEXT: vins.f16 s6, s4
233-
; CHECK-NEXT: vins.f16 s5, s3
234-
; CHECK-NEXT: vmov.f32 s4, s2
235-
; CHECK-NEXT: vmov.f32 s7, s1
236-
; CHECK-NEXT: vmov q0, q1
237-
; CHECK-NEXT: bx lr
226+
; CHECK-LV-LABEL: shuffle3_i16:
227+
; CHECK-LV: @ %bb.0: @ %entry
228+
; CHECK-LV-NEXT: vmovx.f16 s5, s3
229+
; CHECK-LV-NEXT: vmovx.f16 s6, s1
230+
; CHECK-LV-NEXT: vmovx.f16 s4, s0
231+
; CHECK-LV-NEXT: vins.f16 s1, s0
232+
; CHECK-LV-NEXT: vins.f16 s6, s4
233+
; CHECK-LV-NEXT: vins.f16 s5, s3
234+
; CHECK-LV-NEXT: vmov.f32 s4, s2
235+
; CHECK-LV-NEXT: vmov.f32 s7, s1
236+
; CHECK-LV-NEXT: vmov q0, q1
237+
; CHECK-LV-NEXT: bx lr
238+
239+
; CHECK-LIS-LABEL: shuffle3_i16:
240+
; CHECK-LIS: @ %bb.0: @ %entry
241+
; CHECK-LIS-NEXT: vmov q1, q0
242+
; CHECK-LIS-NEXT: vmovx.f16 s2, s5
243+
; CHECK-LIS-NEXT: vmovx.f16 s0, s4
244+
; CHECK-LIS-NEXT: vins.f16 s5, s4
245+
; CHECK-LIS-NEXT: vins.f16 s2, s0
246+
; CHECK-LIS-NEXT: vmov.f32 s0, s6
247+
; CHECK-LIS-NEXT: vmovx.f16 s1, s7
248+
; CHECK-LIS-NEXT: vmov.f32 s3, s5
249+
; CHECK-LIS-NEXT: vins.f16 s1, s7
250+
; CHECK-LIS-NEXT: bx lr
238251
entry:
239252
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
240253
ret <8 x i16> %out
@@ -1145,18 +1158,31 @@ entry:
11451158
}
11461159

11471160
define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
1148-
; CHECK-LABEL: shuffle3_f16:
1149-
; CHECK: @ %bb.0: @ %entry
1150-
; CHECK-NEXT: vmovx.f16 s5, s3
1151-
; CHECK-NEXT: vmovx.f16 s6, s1
1152-
; CHECK-NEXT: vmovx.f16 s4, s0
1153-
; CHECK-NEXT: vins.f16 s1, s0
1154-
; CHECK-NEXT: vins.f16 s6, s4
1155-
; CHECK-NEXT: vins.f16 s5, s3
1156-
; CHECK-NEXT: vmov.f32 s4, s2
1157-
; CHECK-NEXT: vmov.f32 s7, s1
1158-
; CHECK-NEXT: vmov q0, q1
1159-
; CHECK-NEXT: bx lr
1161+
; CHECK-LV-LABEL: shuffle3_f16:
1162+
; CHECK-LV: @ %bb.0: @ %entry
1163+
; CHECK-LV-NEXT: vmovx.f16 s5, s3
1164+
; CHECK-LV-NEXT: vmovx.f16 s6, s1
1165+
; CHECK-LV-NEXT: vmovx.f16 s4, s0
1166+
; CHECK-LV-NEXT: vins.f16 s1, s0
1167+
; CHECK-LV-NEXT: vins.f16 s6, s4
1168+
; CHECK-LV-NEXT: vins.f16 s5, s3
1169+
; CHECK-LV-NEXT: vmov.f32 s4, s2
1170+
; CHECK-LV-NEXT: vmov.f32 s7, s1
1171+
; CHECK-LV-NEXT: vmov q0, q1
1172+
; CHECK-LV-NEXT: bx lr
1173+
1174+
; CHECK-LIS-LABEL: shuffle3_f16:
1175+
; CHECK-LIS: @ %bb.0: @ %entry
1176+
; CHECK-LIS-NEXT: vmov q1, q0
1177+
; CHECK-LIS-NEXT: vmovx.f16 s2, s5
1178+
; CHECK-LIS-NEXT: vmovx.f16 s0, s4
1179+
; CHECK-LIS-NEXT: vins.f16 s5, s4
1180+
; CHECK-LIS-NEXT: vins.f16 s2, s0
1181+
; CHECK-LIS-NEXT: vmov.f32 s0, s6
1182+
; CHECK-LIS-NEXT: vmovx.f16 s1, s7
1183+
; CHECK-LIS-NEXT: vmov.f32 s3, s5
1184+
; CHECK-LIS-NEXT: vins.f16 s1, s7
1185+
; CHECK-LIS-NEXT: bx lr
11601186
entry:
11611187
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
11621188
ret <8 x half> %out
@@ -1467,27 +1493,47 @@ entry:
14671493
ret <2 x double> %out
14681494
}
14691495
define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x double> %src2) {
1470-
; CHECK-LABEL: shuffle9_f64:
1471-
; CHECK: @ %bb.0: @ %entry
1472-
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1473-
; CHECK-NEXT: vpush {d8, d9, d10, d11}
1474-
; CHECK-NEXT: vmov q5, q2
1475-
; CHECK-NEXT: vmov.f32 s16, s0
1476-
; CHECK-NEXT: vmov.f32 s18, s20
1477-
; CHECK-NEXT: vmov.f32 s20, s2
1478-
; CHECK-NEXT: vmov.f32 s10, s12
1479-
; CHECK-NEXT: vmov.f32 s19, s21
1480-
; CHECK-NEXT: vmov.f32 s8, s4
1481-
; CHECK-NEXT: vmov.f32 s17, s1
1482-
; CHECK-NEXT: vmov.f32 s21, s3
1483-
; CHECK-NEXT: vmov q0, q4
1484-
; CHECK-NEXT: vmov.f32 s12, s6
1485-
; CHECK-NEXT: vmov.f32 s11, s13
1486-
; CHECK-NEXT: vmov.f32 s9, s5
1487-
; CHECK-NEXT: vmov.f32 s13, s7
1488-
; CHECK-NEXT: vmov q1, q5
1489-
; CHECK-NEXT: vpop {d8, d9, d10, d11}
1490-
; CHECK-NEXT: bx lr
1496+
; CHECK-LV-LABEL: shuffle9_f64:
1497+
; CHECK-LV: @ %bb.0: @ %entry
1498+
; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1499+
; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1500+
; CHECK-LV-NEXT: vmov q5, q2
1501+
; CHECK-LV-NEXT: vmov.f32 s16, s0
1502+
; CHECK-LV-NEXT: vmov.f32 s18, s20
1503+
; CHECK-LV-NEXT: vmov.f32 s20, s2
1504+
; CHECK-LV-NEXT: vmov.f32 s10, s12
1505+
; CHECK-LV-NEXT: vmov.f32 s19, s21
1506+
; CHECK-LV-NEXT: vmov.f32 s8, s4
1507+
; CHECK-LV-NEXT: vmov.f32 s17, s1
1508+
; CHECK-LV-NEXT: vmov.f32 s21, s3
1509+
; CHECK-LV-NEXT: vmov q0, q4
1510+
; CHECK-LV-NEXT: vmov.f32 s12, s6
1511+
; CHECK-LV-NEXT: vmov.f32 s11, s13
1512+
; CHECK-LV-NEXT: vmov.f32 s9, s5
1513+
; CHECK-LV-NEXT: vmov.f32 s13, s7
1514+
; CHECK-LV-NEXT: vmov q1, q5
1515+
; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1516+
; CHECK-LV-NEXT: bx lr
1517+
1518+
; CHECK-LIS-LABEL: shuffle9_f64:
1519+
; CHECK-LIS: @ %bb.0: @ %entry
1520+
; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1521+
; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1522+
; CHECK-LIS-NEXT: vmov q5, q2
1523+
; CHECK-LIS-NEXT: vmov q4, q0
1524+
; CHECK-LIS-NEXT: vmov.f32 s2, s20
1525+
; CHECK-LIS-NEXT: vmov.f32 s20, s18
1526+
; CHECK-LIS-NEXT: vmov.f32 s10, s12
1527+
; CHECK-LIS-NEXT: vmov.f32 s3, s21
1528+
; CHECK-LIS-NEXT: vmov.f32 s8, s4
1529+
; CHECK-LIS-NEXT: vmov.f32 s21, s19
1530+
; CHECK-LIS-NEXT: vmov.f32 s12, s6
1531+
; CHECK-LIS-NEXT: vmov.f32 s11, s13
1532+
; CHECK-LIS-NEXT: vmov.f32 s9, s5
1533+
; CHECK-LIS-NEXT: vmov.f32 s13, s7
1534+
; CHECK-LIS-NEXT: vmov q1, q5
1535+
; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1536+
; CHECK-LIS-NEXT: bx lr
14911537
entry:
14921538
%out = shufflevector <4 x double> %src1, <4 x double> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
14931539
ret <8 x double> %out
@@ -1560,27 +1606,47 @@ entry:
15601606
ret <2 x i64> %out
15611607
}
15621608
define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) {
1563-
; CHECK-LABEL: shuffle9_i64:
1564-
; CHECK: @ %bb.0: @ %entry
1565-
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1566-
; CHECK-NEXT: vpush {d8, d9, d10, d11}
1567-
; CHECK-NEXT: vmov q5, q2
1568-
; CHECK-NEXT: vmov.f32 s16, s0
1569-
; CHECK-NEXT: vmov.f32 s18, s20
1570-
; CHECK-NEXT: vmov.f32 s20, s2
1571-
; CHECK-NEXT: vmov.f32 s10, s12
1572-
; CHECK-NEXT: vmov.f32 s19, s21
1573-
; CHECK-NEXT: vmov.f32 s8, s4
1574-
; CHECK-NEXT: vmov.f32 s17, s1
1575-
; CHECK-NEXT: vmov.f32 s21, s3
1576-
; CHECK-NEXT: vmov q0, q4
1577-
; CHECK-NEXT: vmov.f32 s12, s6
1578-
; CHECK-NEXT: vmov.f32 s11, s13
1579-
; CHECK-NEXT: vmov.f32 s9, s5
1580-
; CHECK-NEXT: vmov.f32 s13, s7
1581-
; CHECK-NEXT: vmov q1, q5
1582-
; CHECK-NEXT: vpop {d8, d9, d10, d11}
1583-
; CHECK-NEXT: bx lr
1609+
; CHECK-LV-LABEL: shuffle9_i64:
1610+
; CHECK-LV: @ %bb.0: @ %entry
1611+
; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1612+
; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1613+
; CHECK-LV-NEXT: vmov q5, q2
1614+
; CHECK-LV-NEXT: vmov.f32 s16, s0
1615+
; CHECK-LV-NEXT: vmov.f32 s18, s20
1616+
; CHECK-LV-NEXT: vmov.f32 s20, s2
1617+
; CHECK-LV-NEXT: vmov.f32 s10, s12
1618+
; CHECK-LV-NEXT: vmov.f32 s19, s21
1619+
; CHECK-LV-NEXT: vmov.f32 s8, s4
1620+
; CHECK-LV-NEXT: vmov.f32 s17, s1
1621+
; CHECK-LV-NEXT: vmov.f32 s21, s3
1622+
; CHECK-LV-NEXT: vmov q0, q4
1623+
; CHECK-LV-NEXT: vmov.f32 s12, s6
1624+
; CHECK-LV-NEXT: vmov.f32 s11, s13
1625+
; CHECK-LV-NEXT: vmov.f32 s9, s5
1626+
; CHECK-LV-NEXT: vmov.f32 s13, s7
1627+
; CHECK-LV-NEXT: vmov q1, q5
1628+
; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1629+
; CHECK-LV-NEXT: bx lr
1630+
1631+
; CHECK-LIS-LABEL: shuffle9_i64:
1632+
; CHECK-LIS: @ %bb.0: @ %entry
1633+
; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1634+
; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1635+
; CHECK-LIS-NEXT: vmov q5, q2
1636+
; CHECK-LIS-NEXT: vmov q4, q0
1637+
; CHECK-LIS-NEXT: vmov.f32 s2, s20
1638+
; CHECK-LIS-NEXT: vmov.f32 s20, s18
1639+
; CHECK-LIS-NEXT: vmov.f32 s10, s12
1640+
; CHECK-LIS-NEXT: vmov.f32 s3, s21
1641+
; CHECK-LIS-NEXT: vmov.f32 s8, s4
1642+
; CHECK-LIS-NEXT: vmov.f32 s21, s19
1643+
; CHECK-LIS-NEXT: vmov.f32 s12, s6
1644+
; CHECK-LIS-NEXT: vmov.f32 s11, s13
1645+
; CHECK-LIS-NEXT: vmov.f32 s9, s5
1646+
; CHECK-LIS-NEXT: vmov.f32 s13, s7
1647+
; CHECK-LIS-NEXT: vmov q1, q5
1648+
; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1649+
; CHECK-LIS-NEXT: bx lr
15841650
entry:
15851651
%out = shufflevector <4 x i64> %src1, <4 x i64> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
15861652
ret <8 x i64> %out

0 commit comments

Comments
 (0)