Skip to content

Commit 2d6a5ab

Browse files
committed
[X86]Recommit D154193 - Remove TEST in AND32ri+TEST16rr in peephole-opt
Previously we remove a pattern like: %reg = and32ri %in_reg, 5 ... // EFLAGS not changed. %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index test64rr %src_reg, %src_reg, implicit-def $eflags We can remove test64rr since it has same functionality as and subreg_to_reg avoid the opt in previous code, so we handle this case specially. And this case is also can be opted for the same reason, like: %reg = and32ri %in_reg, 5 ... // EFLAGS not changed. %src_reg = copy %reg.sub_16bit:gr32 test16rr %src_reg, %src_reg, implicit-def $eflags The COPY from gr32 to gr16 prevent the opt in previous code too, just handle it specially as what we did for test64rr. Reviewed By: skan Differential Revision: https://reviews.llvm.org/D154193
1 parent 0d81093 commit 2d6a5ab

File tree

2 files changed

+214
-45
lines changed

2 files changed

+214
-45
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -971,39 +971,57 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
971971
MachineInstr **AndInstr,
972972
const TargetRegisterInfo *TRI,
973973
bool &NoSignFlag, bool &ClearsOverflowFlag) {
974-
if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
974+
if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG &&
975+
CmpInstr.getOpcode() == X86::TEST64rr) &&
976+
!(CmpValDefInstr.getOpcode() == X86::COPY &&
977+
CmpInstr.getOpcode() == X86::TEST16rr))
975978
return false;
976979

977-
if (CmpInstr.getOpcode() != X86::TEST64rr)
978-
return false;
979-
980-
// CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
981-
// guarantees that it's analyzable only if two registers are identical.
982-
assert(
983-
(CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
984-
"CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
985-
"requires two reg operands are the same.");
980+
// CmpInstr is a TEST16rr/TEST64rr instruction, and
981+
// `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two
982+
// registers are identical.
983+
assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
984+
"CmpInstr is an analyzable TEST16rr/TEST64rr, and "
985+
"`X86InstrInfo::analyzeCompare` requires two reg operands are the"
986+
"same.");
986987

987988
// Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
988989
// `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
989990
// if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
990991
// redundant.
991992
assert(
992993
(MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
993-
"Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
994+
"Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
995+
"is a user of COPY sub16bit.");
996+
MachineInstr *VregDefInstr = nullptr;
997+
if (CmpInstr.getOpcode() == X86::TEST16rr) {
998+
if (!CmpValDefInstr.getOperand(1).getReg().isVirtual())
999+
return false;
1000+
VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg());
1001+
if (!VregDefInstr)
1002+
return false;
1003+
// We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit
1004+
// size, others 32/64 bit ops would test higher bits which test16rr don't
1005+
// want to.
1006+
if (!((VregDefInstr->getOpcode() == X86::AND32ri ||
1007+
VregDefInstr->getOpcode() == X86::AND64ri32) &&
1008+
isUInt<16>(VregDefInstr->getOperand(2).getImm())))
1009+
return false;
1010+
}
9941011

995-
// As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
996-
// 0.
997-
if (CmpValDefInstr.getOperand(1).getImm() != 0)
998-
return false;
1012+
if (CmpInstr.getOpcode() == X86::TEST64rr) {
1013+
// As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is
1014+
// typically 0.
1015+
if (CmpValDefInstr.getOperand(1).getImm() != 0)
1016+
return false;
9991017

1000-
// As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
1001-
// sub_32bit or sub_xmm.
1002-
if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
1003-
return false;
1018+
// As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
1019+
// sub_32bit or sub_xmm.
1020+
if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
1021+
return false;
10041022

1005-
MachineInstr *VregDefInstr =
1006-
MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
1023+
VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
1024+
}
10071025

10081026
assert(VregDefInstr && "Must have a definition (SSA)");
10091027

@@ -1021,6 +1039,11 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
10211039
// ... // EFLAGS not changed
10221040
// %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
10231041
// test64rr %extended_reg, %extended_reg, implicit-def $eflags
1042+
// or
1043+
// %reg = and32* ...
1044+
// ... // EFLAGS not changed.
1045+
// %src_reg = copy %reg.sub_16bit:gr32
1046+
// test16rr %src_reg, %src_reg, implicit-def $eflags
10241047
//
10251048
// If subsequent readers use a subset of bits that don't change
10261049
// after `and*` instructions, it's likely that the test64rr could
@@ -4421,10 +4444,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
44214444
break;
44224445
}
44234446

4424-
// Look back for the following pattern, in which case the test64rr
4425-
// instruction could be erased.
4447+
// Look back for the following pattern, in which case the
4448+
// test16rr/test64rr instruction could be erased.
44264449
//
4427-
// Example:
4450+
// Example for test16rr:
4451+
// %reg = and32ri %in_reg, 5
4452+
// ... // EFLAGS not changed.
4453+
// %src_reg = copy %reg.sub_16bit:gr32
4454+
// test16rr %src_reg, %src_reg, implicit-def $eflags
4455+
// Example for test64rr:
44284456
// %reg = and32ri %in_reg, 5
44294457
// ... // EFLAGS not changed.
44304458
// %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index

llvm/test/CodeGen/X86/peephole-test-after-add.mir

Lines changed: 162 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,29 @@
5858
entry:
5959
%3 = icmp ne i16 %0, 0
6060
%4 = and i32 %1, 123456
61-
%truc = trunc i32 %4 to i16
62-
%5 = icmp eq i16 %truc, 0
61+
%trunc = trunc i32 %4 to i16
62+
%5 = icmp eq i16 %trunc, 0
6363
%6 = select i1 %3, i1 %5, i1 false
6464
br i1 %6, label %if.then, label %if.end
6565

6666
if.then: ; preds = %entry
67-
store i16 %0, ptr %2, align 4
67+
store i32 %4, ptr %2, align 4
68+
br label %if.end
69+
70+
if.end: ; preds = %if.then, %entry
71+
ret i16 0
72+
}
73+
74+
define i16 @erase_test16_sf(i16 %0, i16 %1, ptr nocapture %2) {
75+
entry:
76+
%3 = icmp ne i16 %0, 0
77+
%4 = and i16 %1, 1234
78+
%5 = icmp slt i16 %4, 0
79+
%6 = select i1 %3, i1 %5, i1 false
80+
br i1 %6, label %if.then, label %if.end
81+
82+
if.then: ; preds = %entry
83+
store i16 %4, ptr %2, align 4
6884
br label %if.end
6985

7086
if.end: ; preds = %if.then, %entry
@@ -303,9 +319,8 @@ body: |
303319
; CHECK-NEXT: bb.1.entry:
304320
; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
305321
; CHECK-NEXT: {{ $}}
306-
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def dead $eflags
322+
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def $eflags
307323
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
308-
; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
309324
; CHECK-NEXT: JCC_1 %bb.3, 5, implicit $eflags
310325
; CHECK-NEXT: JMP_1 %bb.2
311326
; CHECK-NEXT: {{ $}}
@@ -374,16 +389,16 @@ tracksDebugUserValues: false
374389
registers:
375390
- { id: 0, class: gr32, preferred-register: '' }
376391
- { id: 1, class: gr32, preferred-register: '' }
377-
- { id: 2, class: gr64, preferred-register: '' }
378-
- { id: 3, class: gr16, preferred-register: '' }
392+
- { id: 2, class: gr32, preferred-register: '' }
393+
- { id: 3, class: gr64, preferred-register: '' }
379394
- { id: 4, class: gr16, preferred-register: '' }
380-
- { id: 5, class: gr32, preferred-register: '' }
395+
- { id: 5, class: gr16, preferred-register: '' }
381396
- { id: 6, class: gr32, preferred-register: '' }
382397
- { id: 7, class: gr16, preferred-register: '' }
383398
liveins:
384-
- { reg: '$edi', virtual-reg: '%0' }
385-
- { reg: '$esi', virtual-reg: '%1' }
386-
- { reg: '$rdx', virtual-reg: '%2' }
399+
- { reg: '$edi', virtual-reg: '%1' }
400+
- { reg: '$esi', virtual-reg: '%2' }
401+
- { reg: '$rdx', virtual-reg: '%3' }
387402
frameInfo:
388403
isFrameAddressTaken: false
389404
isReturnAddressTaken: false
@@ -429,7 +444,7 @@ body: |
429444
; CHECK-NEXT: bb.1.entry:
430445
; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
431446
; CHECK-NEXT: {{ $}}
432-
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 57920, implicit-def dead $eflags
447+
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123456, implicit-def dead $eflags
433448
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
434449
; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
435450
; CHECK-NEXT: JCC_1 %bb.3, 5, implicit $eflags
@@ -438,7 +453,7 @@ body: |
438453
; CHECK-NEXT: bb.2.if.then:
439454
; CHECK-NEXT: successors: %bb.3(0x80000000)
440455
; CHECK-NEXT: {{ $}}
441-
; CHECK-NEXT: MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s16) into %ir.2, align 4)
456+
; CHECK-NEXT: MOV32mr [[COPY]], 1, $noreg, 0, $noreg, [[AND32ri]] :: (store (s32) into %ir.2)
442457
; CHECK-NEXT: {{ $}}
443458
; CHECK-NEXT: bb.3.if.end:
444459
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
@@ -449,27 +464,153 @@ body: |
449464
successors: %bb.3(0x60000000), %bb.2(0x20000000)
450465
liveins: $edi, $esi, $rdx
451466
452-
%2:gr64 = COPY $rdx
453-
%1:gr32 = COPY $esi
454-
%0:gr32 = COPY $edi
455-
%3:gr16 = COPY %0.sub_16bit
456-
TEST16rr %3, %3, implicit-def $eflags
467+
%3:gr64 = COPY $rdx
468+
%2:gr32 = COPY $esi
469+
%1:gr32 = COPY $edi
470+
%5:gr16 = COPY %1.sub_16bit
471+
TEST16rr %5, %5, implicit-def $eflags
457472
JCC_1 %bb.2, 4, implicit $eflags
458473
JMP_1 %bb.3
459474
460475
bb.3.entry:
461476
successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
462477
463-
%5:gr32 = AND32ri %1, 57920, implicit-def dead $eflags
464-
%4:gr16 = COPY %5.sub_16bit
478+
%0:gr32 = AND32ri %2, 123456, implicit-def dead $eflags
479+
%4:gr16 = COPY %0.sub_16bit
465480
TEST16rr %4, %4, implicit-def $eflags
466481
JCC_1 %bb.2, 5, implicit $eflags
467482
JMP_1 %bb.1
468483
469484
bb.1.if.then:
470485
successors: %bb.2(0x80000000)
471486
472-
MOV16mr %2, 1, $noreg, 0, $noreg, %3 :: (store (s16) into %ir.2, align 4)
487+
MOV32mr %3, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2)
488+
489+
bb.2.if.end:
490+
%6:gr32 = MOV32r0 implicit-def dead $eflags
491+
%7:gr16 = COPY %6.sub_16bit
492+
$ax = COPY %7
493+
RET 0, $ax
494+
495+
...
496+
---
497+
name: erase_test16_sf
498+
alignment: 16
499+
exposesReturnsTwice: false
500+
legalized: false
501+
regBankSelected: false
502+
selected: false
503+
failedISel: false
504+
tracksRegLiveness: true
505+
hasWinCFI: false
506+
callsEHReturn: false
507+
callsUnwindInit: false
508+
hasEHCatchret: false
509+
hasEHScopes: false
510+
hasEHFunclets: false
511+
isOutlined: false
512+
debugInstrRef: true
513+
failsVerification: false
514+
tracksDebugUserValues: false
515+
registers:
516+
- { id: 0, class: gr16, preferred-register: '' }
517+
- { id: 1, class: gr32, preferred-register: '' }
518+
- { id: 2, class: gr32, preferred-register: '' }
519+
- { id: 3, class: gr64, preferred-register: '' }
520+
- { id: 4, class: gr16, preferred-register: '' }
521+
- { id: 5, class: gr32, preferred-register: '' }
522+
- { id: 6, class: gr32, preferred-register: '' }
523+
- { id: 7, class: gr16, preferred-register: '' }
524+
liveins:
525+
- { reg: '$edi', virtual-reg: '%1' }
526+
- { reg: '$esi', virtual-reg: '%2' }
527+
- { reg: '$rdx', virtual-reg: '%3' }
528+
frameInfo:
529+
isFrameAddressTaken: false
530+
isReturnAddressTaken: false
531+
hasStackMap: false
532+
hasPatchPoint: false
533+
stackSize: 0
534+
offsetAdjustment: 0
535+
maxAlignment: 1
536+
adjustsStack: false
537+
hasCalls: false
538+
stackProtector: ''
539+
functionContext: ''
540+
maxCallFrameSize: 4294967295
541+
cvBytesOfCalleeSavedRegisters: 0
542+
hasOpaqueSPAdjustment: false
543+
hasVAStart: false
544+
hasMustTailInVarArgFunc: false
545+
hasTailCall: false
546+
localFrameSize: 0
547+
savePoint: ''
548+
restorePoint: ''
549+
fixedStack: []
550+
stack: []
551+
entry_values: []
552+
callSites: []
553+
debugValueSubstitutions: []
554+
constants: []
555+
machineFunctionInfo: {}
556+
body: |
557+
; CHECK-LABEL: name: erase_test16_sf
558+
; CHECK: bb.0.entry:
559+
; CHECK-NEXT: successors: %bb.1(0x60000000), %bb.3(0x20000000)
560+
; CHECK-NEXT: liveins: $edi, $esi, $rdx
561+
; CHECK-NEXT: {{ $}}
562+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdx
563+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
564+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi
565+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit
566+
; CHECK-NEXT: TEST16rr [[COPY3]], [[COPY3]], implicit-def $eflags
567+
; CHECK-NEXT: JCC_1 %bb.3, 4, implicit $eflags
568+
; CHECK-NEXT: JMP_1 %bb.1
569+
; CHECK-NEXT: {{ $}}
570+
; CHECK-NEXT: bb.1.entry:
571+
; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab)
572+
; CHECK-NEXT: {{ $}}
573+
; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 1234, implicit-def dead $eflags
574+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit
575+
; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags
576+
; CHECK-NEXT: JCC_1 %bb.3, 9, implicit $eflags
577+
; CHECK-NEXT: JMP_1 %bb.2
578+
; CHECK-NEXT: {{ $}}
579+
; CHECK-NEXT: bb.2.if.then:
580+
; CHECK-NEXT: successors: %bb.3(0x80000000)
581+
; CHECK-NEXT: {{ $}}
582+
; CHECK-NEXT: MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY4]] :: (store (s16) into %ir.2, align 4)
583+
; CHECK-NEXT: {{ $}}
584+
; CHECK-NEXT: bb.3.if.end:
585+
; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags
586+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY [[MOV32r0_]].sub_16bit
587+
; CHECK-NEXT: $ax = COPY [[COPY5]]
588+
; CHECK-NEXT: RET 0, $ax
589+
bb.0.entry:
590+
successors: %bb.3(0x60000000), %bb.2(0x20000000)
591+
liveins: $edi, $esi, $rdx
592+
593+
%3:gr64 = COPY $rdx
594+
%2:gr32 = COPY $esi
595+
%1:gr32 = COPY $edi
596+
%4:gr16 = COPY %1.sub_16bit
597+
TEST16rr %4, %4, implicit-def $eflags
598+
JCC_1 %bb.2, 4, implicit $eflags
599+
JMP_1 %bb.3
600+
601+
bb.3.entry:
602+
successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab)
603+
604+
%5:gr32 = AND32ri %2, 1234, implicit-def dead $eflags
605+
%0:gr16 = COPY %5.sub_16bit
606+
TEST16rr %0, %0, implicit-def $eflags
607+
JCC_1 %bb.2, 9, implicit $eflags
608+
JMP_1 %bb.1
609+
610+
bb.1.if.then:
611+
successors: %bb.2(0x80000000)
612+
613+
MOV16mr %3, 1, $noreg, 0, $noreg, %0 :: (store (s16) into %ir.2, align 4)
473614
474615
bb.2.if.end:
475616
%6:gr32 = MOV32r0 implicit-def dead $eflags

0 commit comments

Comments
 (0)