Skip to content

Commit 1d18930

Browse files
authored
[SystemZ] Don't use FP Load and Test as comparisons to same reg (#78074)
The usage of FP Load and Test instructions as a comparison against zero with the assumption that the dest reg will always reflect the source reg is actually incorrect: Unfortunately, a SNaN will be converted to a QNaN, so the instruction may actually change the value as opposed to being a pure register move with a test. This patch - changes instruction selection to always emit FP LT with a scratch def reg, which will typically be allocated to the same reg if dead. - Removes the conversions into FP LT in SystemZElimcompare.
1 parent 82e1e41 commit 1d18930

19 files changed

+168
-159
lines changed

llvm/lib/Target/SystemZ/SystemZElimCompare.cpp

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,6 @@ static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
115115
case SystemZ::LTR:
116116
case SystemZ::LTGR:
117117
case SystemZ::LTGFR:
118-
case SystemZ::LER:
119-
case SystemZ::LDR:
120-
case SystemZ::LXR:
121-
case SystemZ::LTEBR:
122-
case SystemZ::LTDBR:
123-
case SystemZ::LTXBR:
124118
if (MI.getOperand(1).getReg() == Reg)
125119
return true;
126120
}
@@ -498,18 +492,10 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
498492

499493
// Return true if Compare is a comparison against zero.
500494
static bool isCompareZero(MachineInstr &Compare) {
501-
switch (Compare.getOpcode()) {
502-
case SystemZ::LTEBRCompare:
503-
case SystemZ::LTDBRCompare:
504-
case SystemZ::LTXBRCompare:
495+
if (isLoadAndTestAsCmp(Compare))
505496
return true;
506-
507-
default:
508-
if (isLoadAndTestAsCmp(Compare))
509-
return true;
510-
return Compare.getNumExplicitOperands() == 2 &&
511-
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
512-
}
497+
return Compare.getNumExplicitOperands() == 2 &&
498+
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
513499
}
514500

515501
// Try to optimize cases where comparison instruction Compare is testing
@@ -569,7 +555,7 @@ bool SystemZElimCompare::optimizeCompareZero(
569555

570556
// Also do a forward search to handle cases where an instruction after the
571557
// compare can be converted, like
572-
// LTEBRCompare %f0s, %f0s; %f2s = LER %f0s => LTEBRCompare %f2s, %f0s
558+
// CGHI %r0d, 0; %r1d = LGR %r0d => LTGR %r1d, %r0d
573559
auto MIRange = llvm::make_range(
574560
std::next(MachineBasicBlock::iterator(&Compare)), MBB.end());
575561
for (MachineInstr &MI : llvm::make_early_inc_range(MIRange)) {

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9437,11 +9437,11 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
94379437
return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
94389438
case SystemZ::TBEGINC:
94399439
return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9440-
case SystemZ::LTEBRCompare_VecPseudo:
9440+
case SystemZ::LTEBRCompare_Pseudo:
94419441
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9442-
case SystemZ::LTDBRCompare_VecPseudo:
9442+
case SystemZ::LTDBRCompare_Pseudo:
94439443
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9444-
case SystemZ::LTXBRCompare_VecPseudo:
9444+
case SystemZ::LTXBRCompare_Pseudo:
94459445
return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
94469446

94479447
case SystemZ::PROBED_ALLOCA:

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -51,36 +51,27 @@ let isCodeGenOnly = 1 in
5151
def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
5252

5353
// Moves between two floating-point registers that also set the condition
54-
// codes.
54+
// codes. Note that these instructions will turn SNaNs into QNaNs and should
55+
// not be used for comparison if the result will be used afterwards.
5556
let Uses = [FPC], mayRaiseFPException = 1,
5657
Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
57-
defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
58-
defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
59-
defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
60-
}
61-
// Note that LTxBRCompare is not available if we have vector support,
62-
// since load-and-test instructions will partially clobber the target
63-
// (vector) register.
64-
let Predicates = [FeatureNoVector] in {
65-
defm : CompareZeroFP<LTEBRCompare, FP32>;
66-
defm : CompareZeroFP<LTDBRCompare, FP64>;
67-
defm : CompareZeroFP<LTXBRCompare, FP128>;
58+
def LTEBR : UnaryRRE<"ltebr", 0xB302, null_frag, FP32, FP32>;
59+
def LTDBR : UnaryRRE<"ltdbr", 0xB312, null_frag, FP64, FP64>;
60+
def LTXBR : UnaryRRE<"ltxbr", 0xB342, null_frag, FP128, FP128>;
6861
}
6962

70-
// Use a normal load-and-test for compare against zero in case of
71-
// vector support (via a pseudo to simplify instruction selection).
63+
// Use a load-and-test for compare against zero (via a pseudo to simplify
64+
// instruction selection).
7265
let Uses = [FPC], mayRaiseFPException = 1,
7366
Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
74-
def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
75-
def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
76-
def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
77-
}
78-
let Predicates = [FeatureVector] in {
79-
defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
80-
defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
67+
def LTEBRCompare_Pseudo : Pseudo<(outs), (ins FP32:$R1), []>;
68+
def LTDBRCompare_Pseudo : Pseudo<(outs), (ins FP64:$R1), []>;
69+
def LTXBRCompare_Pseudo : Pseudo<(outs), (ins FP128:$R1), []>;
8170
}
82-
let Predicates = [FeatureVector, FeatureNoVectorEnhancements1] in
83-
defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
71+
defm : CompareZeroFP<LTEBRCompare_Pseudo, FP32>;
72+
defm : CompareZeroFP<LTDBRCompare_Pseudo, FP64>;
73+
let Predicates = [FeatureNoVectorEnhancements1] in
74+
defm : CompareZeroFP<LTXBRCompare_Pseudo, FP128>;
8475

8576
// Moves between 64-bit integer and floating-point registers.
8677
def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;

llvm/lib/Target/SystemZ/SystemZInstrFormats.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5036,18 +5036,6 @@ class BranchPreloadMII<string mnemonic, bits<8> opcode>
50365036
(ins imm32zx4:$M1, brtarget12bpp:$RI2, brtarget24bpp:$RI3),
50375037
mnemonic#"\t$M1, $RI2, $RI3", []>;
50385038

5039-
// A floating-point load-and test operation. Create both a normal unary
5040-
// operation and one that acts as a comparison against zero.
5041-
// Note that the comparison against zero operation is not available if we
5042-
// have vector support, since load-and-test instructions will partially
5043-
// clobber the target (vector) register.
5044-
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
5045-
RegisterOperand cls> {
5046-
def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
5047-
let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
5048-
def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
5049-
}
5050-
50515039
//===----------------------------------------------------------------------===//
50525040
// Pseudo instructions
50535041
//===----------------------------------------------------------------------===//

llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,9 +1692,6 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
16921692
case SystemZ::LR: return SystemZ::LTR;
16931693
case SystemZ::LGFR: return SystemZ::LTGFR;
16941694
case SystemZ::LGR: return SystemZ::LTGR;
1695-
case SystemZ::LER: return SystemZ::LTEBR;
1696-
case SystemZ::LDR: return SystemZ::LTDBR;
1697-
case SystemZ::LXR: return SystemZ::LTXBR;
16981695
case SystemZ::LCDFR: return SystemZ::LCDBR;
16991696
case SystemZ::LPDFR: return SystemZ::LPDBR;
17001697
case SystemZ::LNDFR: return SystemZ::LNDBR;

llvm/lib/Target/SystemZ/SystemZPatterns.td

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,11 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
145145
}
146146

147147
// Record that INSN is a LOAD AND TEST that can be used to compare
148-
// registers in CLS against zero. The instruction has separate R1 and R2
149-
// operands, but they must be the same when the instruction is used like this.
148+
// registers in CLS against zero.
150149
multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
151-
def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
150+
def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg)>;
152151
// The sign of the zero makes no difference.
153-
def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
152+
def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg)>;
154153
}
155154

156155
// Use INSN for performing binary operation OPERATION of type VT

llvm/lib/Target/SystemZ/SystemZScheduleZ13.td

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -784,9 +784,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
784784

785785
// Load and Test
786786
def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
787-
def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
788-
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
789-
(instregex "LTXBR(Compare)?$")>;
787+
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
790788

791789
// Copy sign
792790
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

llvm/lib/Target/SystemZ/SystemZScheduleZ14.td

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -804,9 +804,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
804804

805805
// Load and Test
806806
def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
807-
def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
808-
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
809-
(instregex "LTXBR(Compare)?$")>;
807+
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
810808

811809
// Copy sign
812810
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

llvm/lib/Target/SystemZ/SystemZScheduleZ15.td

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -821,9 +821,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
821821

822822
// Load and Test
823823
def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
824-
def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
825-
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
826-
(instregex "LTXBR(Compare)?$")>;
824+
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
827825

828826
// Copy sign
829827
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

llvm/lib/Target/SystemZ/SystemZScheduleZ16.td

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -822,9 +822,7 @@ def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
822822

823823
// Load and Test
824824
def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
825-
def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
826-
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
827-
(instregex "LTXBR(Compare)?$")>;
825+
def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXBR$")>;
828826

829827
// Copy sign
830828
def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;

llvm/lib/Target/SystemZ/SystemZScheduleZ196.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -716,8 +716,7 @@ def : InstRW<[WLat2, FXU2, GroupAlone2], (instregex "LXR$")>;
716716

717717
// Load and Test
718718
def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
719-
def : InstRW<[WLat9, FPU, NormalGr], (instregex "LT(E|D)BRCompare$")>;
720-
def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR(Compare)?$")>;
719+
def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
721720

722721
// Copy sign
723722
def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;

llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,8 +754,7 @@ def : InstRW<[WLat2, FXU2, GroupAlone], (instregex "LXR$")>;
754754

755755
// Load and Test
756756
def : InstRW<[WLat9, WLat9, FPU, NormalGr], (instregex "LT(E|D)BR$")>;
757-
def : InstRW<[WLat9, FPU, NormalGr], (instregex "LT(E|D)BRCompare$")>;
758-
def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR(Compare)?$")>;
757+
def : InstRW<[WLat10, WLat10, FPU4, GroupAlone], (instregex "LTXBR$")>;
759758

760759
// Copy sign
761760
def : InstRW<[WLat5, FXU2, GroupAlone], (instregex "CPSDR(d|s)(d|s)$")>;

llvm/test/CodeGen/SystemZ/fp-cmp-04.ll

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ exit:
163163
define float @f9(float %a, float %b, ptr %dest) {
164164
; CHECK-LABEL: f9:
165165
; CHECK: meebr %f0, %f2
166-
; CHECK-NEXT: ltebr %f0, %f0
166+
; CHECK-NEXT: ltebr %f1, %f0
167167
; CHECK-NEXT: blhr %r14
168168
; CHECK: br %r14
169169
entry:
@@ -185,7 +185,7 @@ define float @f10(float %a, float %b, float %c, ptr %dest) {
185185
; CHECK-LABEL: f10:
186186
; CHECK: aebr %f0, %f2
187187
; CHECK-NEXT: debr %f0, %f4
188-
; CHECK-NEXT: ltebr %f0, %f0
188+
; CHECK-NEXT: ltebr %f1, %f0
189189
; CHECK-NEXT: bner %r14
190190
; CHECK: br %r14
191191
entry:
@@ -209,7 +209,7 @@ define float @f11(float %a, float %b, float %c, ptr %dest1, ptr %dest2) {
209209
; CHECK: aebr %f0, %f2
210210
; CHECK-NEXT: sebr %f4, %f0
211211
; CHECK-DAG: ste %f4, 0(%r2)
212-
; CHECK-DAG: ltebr %f0, %f0
212+
; CHECK-DAG: ltebr %f1, %f0
213213
; CHECK-NEXT: ber %r14
214214
; CHECK: br %r14
215215
entry:
@@ -227,10 +227,11 @@ exit:
227227
ret float %add
228228
}
229229

230-
; Test that LER gets converted to LTEBR where useful.
230+
; %val in %f2 must be preserved during comparison and also copied to %f0.
231231
define float @f12(float %dummy, float %val, ptr %dest) {
232232
; CHECK-LABEL: f12:
233-
; CHECK: ltebr %f0, %f2
233+
; CHECK: ler %f0, %f2
234+
; CHECK-NEXT: ltebr %f1, %f2
234235
; CHECK-NEXT: #APP
235236
; CHECK-NEXT: blah %f0
236237
; CHECK-NEXT: #NO_APP
@@ -249,10 +250,11 @@ exit:
249250
ret float %val
250251
}
251252

252-
; Test that LDR gets converted to LTDBR where useful.
253+
; Same for double.
253254
define double @f13(double %dummy, double %val, ptr %dest) {
254255
; CHECK-LABEL: f13:
255-
; CHECK: ltdbr %f0, %f2
256+
; CHECK: ldr %f0, %f2
257+
; CHECK-NEXT: ltdbr %f1, %f2
256258
; CHECK-NEXT: #APP
257259
; CHECK-NEXT: blah %f0
258260
; CHECK-NEXT: #NO_APP
@@ -271,14 +273,15 @@ exit:
271273
ret double %val
272274
}
273275

274-
; Test that LXR gets converted to LTXBR where useful.
276+
; LXR cannot be converted to LTXBR as its input is live after it.
275277
define void @f14(ptr %ptr1, ptr %ptr2) {
276278
; CHECK-LABEL: f14:
277-
; CHECK: ltxbr
279+
; CHECK: lxr
278280
; CHECK-NEXT: dxbr
279281
; CHECK-NEXT: std
280282
; CHECK-NEXT: std
281283
; CHECK-NEXT: mxbr
284+
; CHECK-NEXT: ltxbr
282285
; CHECK-NEXT: std
283286
; CHECK-NEXT: std
284287
; CHECK-NEXT: blr %r14
@@ -301,11 +304,10 @@ exit:
301304
ret void
302305
}
303306

304-
; Test a case where it is the source rather than destination of LER that
305-
; we need.
306307
define float @f15(float %val, float %dummy, ptr %dest) {
307308
; CHECK-LABEL: f15:
308-
; CHECK: ltebr %f2, %f0
309+
; CHECK: ltebr %f1, %f0
310+
; CHECK-NEXT: ler %f2, %f0
309311
; CHECK-NEXT: #APP
310312
; CHECK-NEXT: blah %f2
311313
; CHECK-NEXT: #NO_APP
@@ -324,11 +326,10 @@ exit:
324326
ret float %val
325327
}
326328

327-
; Test a case where it is the source rather than destination of LDR that
328-
; we need.
329329
define double @f16(double %val, double %dummy, ptr %dest) {
330330
; CHECK-LABEL: f16:
331-
; CHECK: ltdbr %f2, %f0
331+
; CHECK: ltdbr %f1, %f0
332+
; CHECK: ldr %f2, %f0
332333
; CHECK-NEXT: #APP
333334
; CHECK-NEXT: blah %f2
334335
; CHECK-NEXT: #NO_APP

llvm/test/CodeGen/SystemZ/fp-cmp-07.mir

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)