@@ -161,6 +161,9 @@ static bool isUnscaledLdSt(unsigned Opc) {
161
161
case AArch64::LDURXi:
162
162
case AArch64::LDURSWi:
163
163
case AArch64::LDURHHi:
164
+ case AArch64::LDURBBi:
165
+ case AArch64::LDURSBWi:
166
+ case AArch64::LDURSHWi:
164
167
return true ;
165
168
}
166
169
}
@@ -169,16 +172,39 @@ static bool isUnscaledLdSt(MachineInstr *MI) {
169
172
return isUnscaledLdSt (MI->getOpcode ());
170
173
}
171
174
175
+ static unsigned getBitExtrOpcode (MachineInstr *MI) {
176
+ switch (MI->getOpcode ()) {
177
+ default :
178
+ llvm_unreachable (" Unexpected opcode." );
179
+ case AArch64::LDRBBui:
180
+ case AArch64::LDURBBi:
181
+ case AArch64::LDRHHui:
182
+ case AArch64::LDURHHi:
183
+ return AArch64::UBFMWri;
184
+ case AArch64::LDRSBWui:
185
+ case AArch64::LDURSBWi:
186
+ case AArch64::LDRSHWui:
187
+ case AArch64::LDURSHWi:
188
+ return AArch64::SBFMWri;
189
+ }
190
+ }
191
+
172
192
static bool isSmallTypeLdMerge (unsigned Opc) {
173
193
switch (Opc) {
174
194
default :
175
195
return false ;
176
196
case AArch64::LDRHHui:
177
197
case AArch64::LDURHHi:
198
+ case AArch64::LDRBBui:
199
+ case AArch64::LDURBBi:
200
+ case AArch64::LDRSHWui:
201
+ case AArch64::LDURSHWi:
202
+ case AArch64::LDRSBWui:
203
+ case AArch64::LDURSBWi:
178
204
return true ;
179
- // FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
180
205
}
181
206
}
207
+
182
208
static bool isSmallTypeLdMerge (MachineInstr *MI) {
183
209
return isSmallTypeLdMerge (MI->getOpcode ());
184
210
}
@@ -189,10 +215,15 @@ static int getMemScale(MachineInstr *MI) {
189
215
default :
190
216
llvm_unreachable (" Opcode has unknown scale!" );
191
217
case AArch64::LDRBBui:
218
+ case AArch64::LDURBBi:
219
+ case AArch64::LDRSBWui:
220
+ case AArch64::LDURSBWi:
192
221
case AArch64::STRBBui:
193
222
return 1 ;
194
223
case AArch64::LDRHHui:
195
224
case AArch64::LDURHHi:
225
+ case AArch64::LDRSHWui:
226
+ case AArch64::LDURSHWi:
196
227
case AArch64::STRHHui:
197
228
return 2 ;
198
229
case AArch64::LDRSui:
@@ -265,11 +296,21 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
265
296
case AArch64::LDURSi:
266
297
case AArch64::LDRHHui:
267
298
case AArch64::LDURHHi:
299
+ case AArch64::LDRBBui:
300
+ case AArch64::LDURBBi:
268
301
return Opc;
269
302
case AArch64::LDRSWui:
270
303
return AArch64::LDRWui;
271
304
case AArch64::LDURSWi:
272
305
return AArch64::LDURWi;
306
+ case AArch64::LDRSBWui:
307
+ return AArch64::LDRBBui;
308
+ case AArch64::LDRSHWui:
309
+ return AArch64::LDRHHui;
310
+ case AArch64::LDURSBWi:
311
+ return AArch64::LDURBBi;
312
+ case AArch64::LDURSHWi:
313
+ return AArch64::LDURHHi;
273
314
}
274
315
}
275
316
@@ -311,9 +352,17 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
311
352
case AArch64::LDURSWi:
312
353
return AArch64::LDPSWi;
313
354
case AArch64::LDRHHui:
355
+ case AArch64::LDRSHWui:
314
356
return AArch64::LDRWui;
315
357
case AArch64::LDURHHi:
358
+ case AArch64::LDURSHWi:
316
359
return AArch64::LDURWi;
360
+ case AArch64::LDRBBui:
361
+ case AArch64::LDRSBWui:
362
+ return AArch64::LDRHHui;
363
+ case AArch64::LDURBBi:
364
+ case AArch64::LDURSBWi:
365
+ return AArch64::LDURHHi;
317
366
}
318
367
}
319
368
@@ -535,16 +584,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
535
584
536
585
if (isSmallTypeLdMerge (Opc)) {
537
586
// Change the scaled offset from small to large type.
538
- if (!IsUnscaled)
587
+ if (!IsUnscaled) {
588
+ assert (((OffsetImm & 1 ) == 0 ) && " Unexpected offset to merge" );
539
589
OffsetImm /= 2 ;
590
+ }
540
591
MachineInstr *RtNewDest = MergeForward ? I : Paired;
541
592
// When merging small (< 32 bit) loads for big-endian targets, the order of
542
593
// the component parts gets swapped.
543
594
if (!Subtarget->isLittleEndian ())
544
595
std::swap (RtMI, Rt2MI);
545
596
// Construct the new load instruction.
546
- // FIXME: currently we support only halfword unsigned load. We need to
547
- // handle byte type, signed, and store instructions as well.
548
597
MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
549
598
NewMemMI = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
550
599
TII->get (NewOpc))
@@ -564,35 +613,61 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
564
613
DEBUG (dbgs () << " with instructions:\n " );
565
614
DEBUG ((NewMemMI)->print (dbgs ()));
566
615
616
+ int Width = getMemScale (I) == 1 ? 8 : 16 ;
617
+ int LSBLow = 0 ;
618
+ int LSBHigh = Width;
619
+ int ImmsLow = LSBLow + Width - 1 ;
620
+ int ImmsHigh = LSBHigh + Width - 1 ;
567
621
MachineInstr *ExtDestMI = MergeForward ? Paired : I;
568
622
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian ()) {
569
- // Create the bitfield extract for high half .
623
+ // Create the bitfield extract for high bits .
570
624
BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
571
- TII->get (AArch64::UBFMWri ))
625
+ TII->get (getBitExtrOpcode (Rt2MI) ))
572
626
.addOperand (getLdStRegOp (Rt2MI))
573
627
.addReg (getLdStRegOp (RtNewDest).getReg ())
574
- .addImm (16 )
575
- .addImm (31 );
576
- // Create the bitfield extract for low half.
577
- BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
578
- TII->get (AArch64::ANDWri))
579
- .addOperand (getLdStRegOp (RtMI))
580
- .addReg (getLdStRegOp (RtNewDest).getReg ())
581
- .addImm (15 );
628
+ .addImm (LSBHigh)
629
+ .addImm (ImmsHigh);
630
+ // Create the bitfield extract for low bits.
631
+ if (RtMI->getOpcode () == getMatchingNonSExtOpcode (RtMI->getOpcode ())) {
632
+ // For unsigned, prefer to use AND for low bits.
633
+ BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
634
+ TII->get (AArch64::ANDWri))
635
+ .addOperand (getLdStRegOp (RtMI))
636
+ .addReg (getLdStRegOp (RtNewDest).getReg ())
637
+ .addImm (ImmsLow);
638
+ } else {
639
+ BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
640
+ TII->get (getBitExtrOpcode (RtMI)))
641
+ .addOperand (getLdStRegOp (RtMI))
642
+ .addReg (getLdStRegOp (RtNewDest).getReg ())
643
+ .addImm (LSBLow)
644
+ .addImm (ImmsLow);
645
+ }
582
646
} else {
583
- // Create the bitfield extract for low half.
584
- BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
585
- TII->get (AArch64::ANDWri))
586
- .addOperand (getLdStRegOp (RtMI))
587
- .addReg (getLdStRegOp (RtNewDest).getReg ())
588
- .addImm (15 );
589
- // Create the bitfield extract for high half.
647
+ // Create the bitfield extract for low bits.
648
+ if (RtMI->getOpcode () == getMatchingNonSExtOpcode (RtMI->getOpcode ())) {
649
+ // For unsigned, prefer to use AND for low bits.
650
+ BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
651
+ TII->get (AArch64::ANDWri))
652
+ .addOperand (getLdStRegOp (RtMI))
653
+ .addReg (getLdStRegOp (RtNewDest).getReg ())
654
+ .addImm (ImmsLow);
655
+ } else {
656
+ BitExtMI1 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
657
+ TII->get (getBitExtrOpcode (RtMI)))
658
+ .addOperand (getLdStRegOp (RtMI))
659
+ .addReg (getLdStRegOp (RtNewDest).getReg ())
660
+ .addImm (LSBLow)
661
+ .addImm (ImmsLow);
662
+ }
663
+
664
+ // Create the bitfield extract for high bits.
590
665
BitExtMI2 = BuildMI (*I->getParent (), InsertionPoint, I->getDebugLoc (),
591
- TII->get (AArch64::UBFMWri ))
666
+ TII->get (getBitExtrOpcode (Rt2MI) ))
592
667
.addOperand (getLdStRegOp (Rt2MI))
593
668
.addReg (getLdStRegOp (RtNewDest).getReg ())
594
- .addImm (16 )
595
- .addImm (31 );
669
+ .addImm (LSBHigh )
670
+ .addImm (ImmsHigh );
596
671
}
597
672
DEBUG (dbgs () << " " );
598
673
DEBUG ((BitExtMI1)->print (dbgs ()));
@@ -1173,7 +1248,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
1173
1248
bool enableNarrowLdOpt) {
1174
1249
bool Modified = false ;
1175
1250
// Three tranformations to do here:
1176
- // 1) Find halfword loads that can be merged into a single 32-bit word load
1251
+ // 1) Find narrow loads that can be converted into a single wider load
1177
1252
// with bitfield extract instructions.
1178
1253
// e.g.,
1179
1254
// ldrh w0, [x2]
@@ -1206,9 +1281,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
1206
1281
++MBBI;
1207
1282
break ;
1208
1283
// Scaled instructions.
1284
+ case AArch64::LDRBBui:
1209
1285
case AArch64::LDRHHui:
1286
+ case AArch64::LDRSBWui:
1287
+ case AArch64::LDRSHWui:
1210
1288
// Unscaled instructions.
1211
- case AArch64::LDURHHi: {
1289
+ case AArch64::LDURBBi:
1290
+ case AArch64::LDURHHi:
1291
+ case AArch64::LDURSBWi:
1292
+ case AArch64::LDURSHWi: {
1212
1293
if (tryToMergeLdStInst (MBBI)) {
1213
1294
Modified = true ;
1214
1295
break ;
0 commit comments