Skip to content

Commit eeb240a

Browse files
committed
Merged two loops that were iterating over the same machine basic block, also did some minor readability improvements (like commenting and absorbing if condition into a variable), also adjusted tests to match the new behavior
1 parent 0e3ba99 commit eeb240a

File tree

3 files changed

+965
-486
lines changed

3 files changed

+965
-486
lines changed

llvm/lib/CodeGen/SlotIndexes.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
212212
IndexList::iterator ListI = endIdx.listEntry()->getIterator();
213213
MachineBasicBlock::iterator MBBI = End;
214214
bool pastStart = false;
215+
bool oldIndexesRemoved = false;
215216
while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
216217
assert(ListI->getIndex() >= startIdx.getIndex() &&
217218
(includeStart || !pastStart) &&
@@ -220,32 +221,39 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
220221
MachineInstr *SlotMI = ListI->getInstr();
221222
MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr;
222223
bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
224+
bool MIIndexNotFound = MI && mi2iMap.find(MI) == mi2iMap.end();
225+
bool SlotMIRemoved = false;
223226

224227
if (SlotMI == MI && !MBBIAtBegin) {
225228
--ListI;
226229
if (MBBI != Begin)
227230
--MBBI;
228231
else
229232
pastStart = true;
230-
} else if (MI && !mi2iMap.contains(MI)) {
233+
} else if (MIIndexNotFound || oldIndexesRemoved) {
231234
if (MBBI != Begin)
232235
--MBBI;
233236
else
234237
pastStart = true;
235238
} else {
236-
--ListI;
237-
if (SlotMI)
239+
// We ran through all the indexes on the interval
240+
// -> The only thing left is to go through all the
241+
// remaining MBB instructions and update their indexes
242+
if (ListI == ListB)
243+
oldIndexesRemoved = true;
244+
else
245+
--ListI;
246+
if (SlotMI) {
238247
removeMachineInstrFromMaps(*SlotMI);
248+
SlotMIRemoved = true;
249+
}
239250
}
240-
}
241251

242-
// In theory this could be combined with the previous loop, but it is tricky
243-
// to update the IndexList while we are iterating it.
244-
for (MachineBasicBlock::iterator I = End; I != Begin;) {
245-
--I;
246-
MachineInstr &MI = *I;
247-
if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
248-
insertMachineInstrInMaps(MI);
252+
MachineInstr *instrToInsert = SlotMIRemoved ? SlotMI : MI;
253+
254+
// Insert isntruction back into the maps after passing it/removing the index
255+
if ((MIIndexNotFound || SlotMIRemoved) && instrToInsert->getParent() != nullptr && !instrToInsert->isDebugOrPseudoInstr())
256+
insertMachineInstrInMaps(*instrToInsert);
249257
}
250258
}
251259

@@ -287,4 +295,4 @@ LLVM_DUMP_METHOD void SlotIndex::dump() const {
287295
print(dbgs());
288296
dbgs() << "\n";
289297
}
290-
#endif
298+
#endif

llvm/test/CodeGen/Thumb2/mve-shuffle.ll

Lines changed: 133 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -223,18 +223,31 @@ entry:
223223
}
224224

225225
define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
226-
; CHECK-LABEL: shuffle3_i16:
227-
; CHECK: @ %bb.0: @ %entry
228-
; CHECK-NEXT: vmovx.f16 s5, s3
229-
; CHECK-NEXT: vmovx.f16 s6, s1
230-
; CHECK-NEXT: vmovx.f16 s4, s0
231-
; CHECK-NEXT: vins.f16 s1, s0
232-
; CHECK-NEXT: vins.f16 s6, s4
233-
; CHECK-NEXT: vins.f16 s5, s3
234-
; CHECK-NEXT: vmov.f32 s4, s2
235-
; CHECK-NEXT: vmov.f32 s7, s1
236-
; CHECK-NEXT: vmov q0, q1
237-
; CHECK-NEXT: bx lr
226+
; CHECK-LV-LABEL: shuffle3_i16:
227+
; CHECK-LV: @ %bb.0: @ %entry
228+
; CHECK-LV-NEXT: vmovx.f16 s5, s3
229+
; CHECK-LV-NEXT: vmovx.f16 s6, s1
230+
; CHECK-LV-NEXT: vmovx.f16 s4, s0
231+
; CHECK-LV-NEXT: vins.f16 s1, s0
232+
; CHECK-LV-NEXT: vins.f16 s6, s4
233+
; CHECK-LV-NEXT: vins.f16 s5, s3
234+
; CHECK-LV-NEXT: vmov.f32 s4, s2
235+
; CHECK-LV-NEXT: vmov.f32 s7, s1
236+
; CHECK-LV-NEXT: vmov q0, q1
237+
; CHECK-LV-NEXT: bx lr
238+
239+
; CHECK-LIS-LABEL: shuffle3_i16:
240+
; CHECK-LIS: @ %bb.0: @ %entry
241+
; CHECK-LIS-NEXT: vmov q1, q0
242+
; CHECK-LIS-NEXT: vmovx.f16 s2, s5
243+
; CHECK-LIS-NEXT: vmovx.f16 s0, s4
244+
; CHECK-LIS-NEXT: vins.f16 s5, s4
245+
; CHECK-LIS-NEXT: vins.f16 s2, s0
246+
; CHECK-LIS-NEXT: vmov.f32 s0, s6
247+
; CHECK-LIS-NEXT: vmovx.f16 s1, s7
248+
; CHECK-LIS-NEXT: vmov.f32 s3, s5
249+
; CHECK-LIS-NEXT: vins.f16 s1, s7
250+
; CHECK-LIS-NEXT: bx lr
238251
entry:
239252
%out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
240253
ret <8 x i16> %out
@@ -1145,18 +1158,31 @@ entry:
11451158
}
11461159

11471160
define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
1148-
; CHECK-LABEL: shuffle3_f16:
1149-
; CHECK: @ %bb.0: @ %entry
1150-
; CHECK-NEXT: vmovx.f16 s5, s3
1151-
; CHECK-NEXT: vmovx.f16 s6, s1
1152-
; CHECK-NEXT: vmovx.f16 s4, s0
1153-
; CHECK-NEXT: vins.f16 s1, s0
1154-
; CHECK-NEXT: vins.f16 s6, s4
1155-
; CHECK-NEXT: vins.f16 s5, s3
1156-
; CHECK-NEXT: vmov.f32 s4, s2
1157-
; CHECK-NEXT: vmov.f32 s7, s1
1158-
; CHECK-NEXT: vmov q0, q1
1159-
; CHECK-NEXT: bx lr
1161+
; CHECK-LV-LABEL: shuffle3_f16:
1162+
; CHECK-LV: @ %bb.0: @ %entry
1163+
; CHECK-LV-NEXT: vmovx.f16 s5, s3
1164+
; CHECK-LV-NEXT: vmovx.f16 s6, s1
1165+
; CHECK-LV-NEXT: vmovx.f16 s4, s0
1166+
; CHECK-LV-NEXT: vins.f16 s1, s0
1167+
; CHECK-LV-NEXT: vins.f16 s6, s4
1168+
; CHECK-LV-NEXT: vins.f16 s5, s3
1169+
; CHECK-LV-NEXT: vmov.f32 s4, s2
1170+
; CHECK-LV-NEXT: vmov.f32 s7, s1
1171+
; CHECK-LV-NEXT: vmov q0, q1
1172+
; CHECK-LV-NEXT: bx lr
1173+
1174+
; CHECK-LIS-LABEL: shuffle3_f16:
1175+
; CHECK-LIS: @ %bb.0: @ %entry
1176+
; CHECK-LIS-NEXT: vmov q1, q0
1177+
; CHECK-LIS-NEXT: vmovx.f16 s2, s5
1178+
; CHECK-LIS-NEXT: vmovx.f16 s0, s4
1179+
; CHECK-LIS-NEXT: vins.f16 s5, s4
1180+
; CHECK-LIS-NEXT: vins.f16 s2, s0
1181+
; CHECK-LIS-NEXT: vmov.f32 s0, s6
1182+
; CHECK-LIS-NEXT: vmovx.f16 s1, s7
1183+
; CHECK-LIS-NEXT: vmov.f32 s3, s5
1184+
; CHECK-LIS-NEXT: vins.f16 s1, s7
1185+
; CHECK-LIS-NEXT: bx lr
11601186
entry:
11611187
%out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
11621188
ret <8 x half> %out
@@ -1467,27 +1493,47 @@ entry:
14671493
ret <2 x double> %out
14681494
}
14691495
define arm_aapcs_vfpcc <8 x double> @shuffle9_f64(<4 x double> %src1, <4 x double> %src2) {
1470-
; CHECK-LABEL: shuffle9_f64:
1471-
; CHECK: @ %bb.0: @ %entry
1472-
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1473-
; CHECK-NEXT: vpush {d8, d9, d10, d11}
1474-
; CHECK-NEXT: vmov q5, q2
1475-
; CHECK-NEXT: vmov.f32 s16, s0
1476-
; CHECK-NEXT: vmov.f32 s18, s20
1477-
; CHECK-NEXT: vmov.f32 s20, s2
1478-
; CHECK-NEXT: vmov.f32 s10, s12
1479-
; CHECK-NEXT: vmov.f32 s19, s21
1480-
; CHECK-NEXT: vmov.f32 s8, s4
1481-
; CHECK-NEXT: vmov.f32 s17, s1
1482-
; CHECK-NEXT: vmov.f32 s21, s3
1483-
; CHECK-NEXT: vmov q0, q4
1484-
; CHECK-NEXT: vmov.f32 s12, s6
1485-
; CHECK-NEXT: vmov.f32 s11, s13
1486-
; CHECK-NEXT: vmov.f32 s9, s5
1487-
; CHECK-NEXT: vmov.f32 s13, s7
1488-
; CHECK-NEXT: vmov q1, q5
1489-
; CHECK-NEXT: vpop {d8, d9, d10, d11}
1490-
; CHECK-NEXT: bx lr
1496+
; CHECK-LV-LABEL: shuffle9_f64:
1497+
; CHECK-LV: @ %bb.0: @ %entry
1498+
; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1499+
; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1500+
; CHECK-LV-NEXT: vmov q5, q2
1501+
; CHECK-LV-NEXT: vmov.f32 s16, s0
1502+
; CHECK-LV-NEXT: vmov.f32 s18, s20
1503+
; CHECK-LV-NEXT: vmov.f32 s20, s2
1504+
; CHECK-LV-NEXT: vmov.f32 s10, s12
1505+
; CHECK-LV-NEXT: vmov.f32 s19, s21
1506+
; CHECK-LV-NEXT: vmov.f32 s8, s4
1507+
; CHECK-LV-NEXT: vmov.f32 s17, s1
1508+
; CHECK-LV-NEXT: vmov.f32 s21, s3
1509+
; CHECK-LV-NEXT: vmov q0, q4
1510+
; CHECK-LV-NEXT: vmov.f32 s12, s6
1511+
; CHECK-LV-NEXT: vmov.f32 s11, s13
1512+
; CHECK-LV-NEXT: vmov.f32 s9, s5
1513+
; CHECK-LV-NEXT: vmov.f32 s13, s7
1514+
; CHECK-LV-NEXT: vmov q1, q5
1515+
; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1516+
; CHECK-LV-NEXT: bx lr
1517+
1518+
; CHECK-LIS-LABEL: shuffle9_f64:
1519+
; CHECK-LIS: @ %bb.0: @ %entry
1520+
; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1521+
; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1522+
; CHECK-LIS-NEXT: vmov q5, q2
1523+
; CHECK-LIS-NEXT: vmov q4, q0
1524+
; CHECK-LIS-NEXT: vmov.f32 s2, s20
1525+
; CHECK-LIS-NEXT: vmov.f32 s20, s18
1526+
; CHECK-LIS-NEXT: vmov.f32 s10, s12
1527+
; CHECK-LIS-NEXT: vmov.f32 s3, s21
1528+
; CHECK-LIS-NEXT: vmov.f32 s8, s4
1529+
; CHECK-LIS-NEXT: vmov.f32 s21, s19
1530+
; CHECK-LIS-NEXT: vmov.f32 s12, s6
1531+
; CHECK-LIS-NEXT: vmov.f32 s11, s13
1532+
; CHECK-LIS-NEXT: vmov.f32 s9, s5
1533+
; CHECK-LIS-NEXT: vmov.f32 s13, s7
1534+
; CHECK-LIS-NEXT: vmov q1, q5
1535+
; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1536+
; CHECK-LIS-NEXT: bx lr
14911537
entry:
14921538
%out = shufflevector <4 x double> %src1, <4 x double> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
14931539
ret <8 x double> %out
@@ -1560,27 +1606,47 @@ entry:
15601606
ret <2 x i64> %out
15611607
}
15621608
define arm_aapcs_vfpcc <8 x i64> @shuffle9_i64(<4 x i64> %src1, <4 x i64> %src2) {
1563-
; CHECK-LABEL: shuffle9_i64:
1564-
; CHECK: @ %bb.0: @ %entry
1565-
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1566-
; CHECK-NEXT: vpush {d8, d9, d10, d11}
1567-
; CHECK-NEXT: vmov q5, q2
1568-
; CHECK-NEXT: vmov.f32 s16, s0
1569-
; CHECK-NEXT: vmov.f32 s18, s20
1570-
; CHECK-NEXT: vmov.f32 s20, s2
1571-
; CHECK-NEXT: vmov.f32 s10, s12
1572-
; CHECK-NEXT: vmov.f32 s19, s21
1573-
; CHECK-NEXT: vmov.f32 s8, s4
1574-
; CHECK-NEXT: vmov.f32 s17, s1
1575-
; CHECK-NEXT: vmov.f32 s21, s3
1576-
; CHECK-NEXT: vmov q0, q4
1577-
; CHECK-NEXT: vmov.f32 s12, s6
1578-
; CHECK-NEXT: vmov.f32 s11, s13
1579-
; CHECK-NEXT: vmov.f32 s9, s5
1580-
; CHECK-NEXT: vmov.f32 s13, s7
1581-
; CHECK-NEXT: vmov q1, q5
1582-
; CHECK-NEXT: vpop {d8, d9, d10, d11}
1583-
; CHECK-NEXT: bx lr
1609+
; CHECK-LV-LABEL: shuffle9_i64:
1610+
; CHECK-LV: @ %bb.0: @ %entry
1611+
; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1612+
; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1613+
; CHECK-LV-NEXT: vmov q5, q2
1614+
; CHECK-LV-NEXT: vmov.f32 s16, s0
1615+
; CHECK-LV-NEXT: vmov.f32 s18, s20
1616+
; CHECK-LV-NEXT: vmov.f32 s20, s2
1617+
; CHECK-LV-NEXT: vmov.f32 s10, s12
1618+
; CHECK-LV-NEXT: vmov.f32 s19, s21
1619+
; CHECK-LV-NEXT: vmov.f32 s8, s4
1620+
; CHECK-LV-NEXT: vmov.f32 s17, s1
1621+
; CHECK-LV-NEXT: vmov.f32 s21, s3
1622+
; CHECK-LV-NEXT: vmov q0, q4
1623+
; CHECK-LV-NEXT: vmov.f32 s12, s6
1624+
; CHECK-LV-NEXT: vmov.f32 s11, s13
1625+
; CHECK-LV-NEXT: vmov.f32 s9, s5
1626+
; CHECK-LV-NEXT: vmov.f32 s13, s7
1627+
; CHECK-LV-NEXT: vmov q1, q5
1628+
; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1629+
; CHECK-LV-NEXT: bx lr
1630+
1631+
; CHECK-LIS-LABEL: shuffle9_i64:
1632+
; CHECK-LIS: @ %bb.0: @ %entry
1633+
; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1634+
; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1635+
; CHECK-LIS-NEXT: vmov q5, q2
1636+
; CHECK-LIS-NEXT: vmov q4, q0
1637+
; CHECK-LIS-NEXT: vmov.f32 s2, s20
1638+
; CHECK-LIS-NEXT: vmov.f32 s20, s18
1639+
; CHECK-LIS-NEXT: vmov.f32 s10, s12
1640+
; CHECK-LIS-NEXT: vmov.f32 s3, s21
1641+
; CHECK-LIS-NEXT: vmov.f32 s8, s4
1642+
; CHECK-LIS-NEXT: vmov.f32 s21, s19
1643+
; CHECK-LIS-NEXT: vmov.f32 s12, s6
1644+
; CHECK-LIS-NEXT: vmov.f32 s11, s13
1645+
; CHECK-LIS-NEXT: vmov.f32 s9, s5
1646+
; CHECK-LIS-NEXT: vmov.f32 s13, s7
1647+
; CHECK-LIS-NEXT: vmov q1, q5
1648+
; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1649+
; CHECK-LIS-NEXT: bx lr
15841650
entry:
15851651
%out = shufflevector <4 x i64> %src1, <4 x i64> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
15861652
ret <8 x i64> %out
@@ -1822,4 +1888,4 @@ entry:
18221888

18231889
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
18241890
; CHECK-LIS: {{.*}}
1825-
; CHECK-LV: {{.*}}
1891+
; CHECK-LV: {{.*}}

0 commit comments

Comments
 (0)