Skip to content

Commit 6a8d30b

Browse files
authored
DAG: Skip 0 sign handling in minimum/maximum lowering for _ieee case (#91326)
dc9664a changed the documentation to assume these order -0 as less than +0.
1 parent ad652ef commit 6a8d30b

File tree

8 files changed

+1286
-6504
lines changed

8 files changed

+1286
-6504
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8401,8 +8401,14 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
84018401
SDValue MinMax;
84028402
unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
84038403
unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8404+
8405+
// FIXME: We should probably define fminnum/fmaxnum variants with correct
8406+
// signed zero behavior.
8407+
bool MinMaxMustRespectOrderedZero = false;
8408+
84048409
if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
84058410
MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS);
8411+
MinMaxMustRespectOrderedZero = true;
84068412
} else if (isOperationLegalOrCustom(CompOpc, VT)) {
84078413
MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS);
84088414
} else {
@@ -8422,8 +8428,8 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
84228428
}
84238429

84248430
// fminimum/fmaximum requires -0.0 less than +0.0
8425-
if (!N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(RHS) &&
8426-
!DAG.isKnownNeverZeroFloat(LHS)) {
8431+
if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8432+
!DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
84278433
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
84288434
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
84298435
SDValue TestZero =

llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll

Lines changed: 625 additions & 2813 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll

Lines changed: 30 additions & 237 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll

Lines changed: 77 additions & 407 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll

Lines changed: 407 additions & 2193 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll

Lines changed: 30 additions & 237 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll

Lines changed: 77 additions & 407 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll

Lines changed: 32 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -45,74 +45,26 @@ define float @f32_minimum(float %a, float %b) {
4545
;
4646
; VSX-LABEL: f32_minimum:
4747
; VSX: # %bb.0: # %entry
48-
; VSX-NEXT: xscvdpspn 0, 1
4948
; VSX-NEXT: fcmpu 0, 1, 2
50-
; VSX-NEXT: xscvdpspn 3, 2
51-
; VSX-NEXT: mffprwz 3, 0
5249
; VSX-NEXT: bc 12, 3, .LBB0_2
5350
; VSX-NEXT: # %bb.1: # %entry
54-
; VSX-NEXT: xsmindp 0, 1, 2
55-
; VSX-NEXT: b .LBB0_3
51+
; VSX-NEXT: xsmindp 1, 1, 2
52+
; VSX-NEXT: blr
5653
; VSX-NEXT: .LBB0_2:
57-
; VSX-NEXT: addis 4, 2, .LCPI0_0@toc@ha
58-
; VSX-NEXT: lfs 0, .LCPI0_0@toc@l(4)
59-
; VSX-NEXT: .LBB0_3: # %entry
60-
; VSX-NEXT: xoris 3, 3, 32768
61-
; VSX-NEXT: mffprwz 4, 3
62-
; VSX-NEXT: cmplwi 3, 0
63-
; VSX-NEXT: bc 12, 2, .LBB0_5
64-
; VSX-NEXT: # %bb.4: # %entry
65-
; VSX-NEXT: fmr 1, 0
66-
; VSX-NEXT: .LBB0_5: # %entry
67-
; VSX-NEXT: xoris 3, 4, 32768
68-
; VSX-NEXT: cmplwi 3, 0
69-
; VSX-NEXT: bc 12, 2, .LBB0_7
70-
; VSX-NEXT: # %bb.6: # %entry
71-
; VSX-NEXT: fmr 2, 1
72-
; VSX-NEXT: .LBB0_7: # %entry
73-
; VSX-NEXT: xxlxor 1, 1, 1
74-
; VSX-NEXT: fcmpu 0, 0, 1
75-
; VSX-NEXT: bc 12, 2, .LBB0_9
76-
; VSX-NEXT: # %bb.8: # %entry
77-
; VSX-NEXT: fmr 2, 0
78-
; VSX-NEXT: .LBB0_9: # %entry
79-
; VSX-NEXT: fmr 1, 2
54+
; VSX-NEXT: addis 3, 2, .LCPI0_0@toc@ha
55+
; VSX-NEXT: lfs 1, .LCPI0_0@toc@l(3)
8056
; VSX-NEXT: blr
8157
;
8258
; AIX-LABEL: f32_minimum:
8359
; AIX: # %bb.0: # %entry
84-
; AIX-NEXT: xscvdpspn 0, 1
8560
; AIX-NEXT: fcmpu 0, 1, 2
86-
; AIX-NEXT: xscvdpspn 3, 2
87-
; AIX-NEXT: mffprwz 3, 0
8861
; AIX-NEXT: bc 12, 3, L..BB0_2
8962
; AIX-NEXT: # %bb.1: # %entry
90-
; AIX-NEXT: xsmindp 0, 1, 2
91-
; AIX-NEXT: b L..BB0_3
63+
; AIX-NEXT: xsmindp 1, 1, 2
64+
; AIX-NEXT: blr
9265
; AIX-NEXT: L..BB0_2:
93-
; AIX-NEXT: ld 4, L..C0(2) # %const.0
94-
; AIX-NEXT: lfs 0, 0(4)
95-
; AIX-NEXT: L..BB0_3: # %entry
96-
; AIX-NEXT: xoris 3, 3, 32768
97-
; AIX-NEXT: mffprwz 4, 3
98-
; AIX-NEXT: cmplwi 3, 0
99-
; AIX-NEXT: bc 12, 2, L..BB0_5
100-
; AIX-NEXT: # %bb.4: # %entry
101-
; AIX-NEXT: fmr 1, 0
102-
; AIX-NEXT: L..BB0_5: # %entry
103-
; AIX-NEXT: xoris 3, 4, 32768
104-
; AIX-NEXT: cmplwi 3, 0
105-
; AIX-NEXT: bc 12, 2, L..BB0_7
106-
; AIX-NEXT: # %bb.6: # %entry
107-
; AIX-NEXT: fmr 2, 1
108-
; AIX-NEXT: L..BB0_7: # %entry
109-
; AIX-NEXT: xxlxor 1, 1, 1
110-
; AIX-NEXT: fcmpu 0, 0, 1
111-
; AIX-NEXT: bc 12, 2, L..BB0_9
112-
; AIX-NEXT: # %bb.8: # %entry
113-
; AIX-NEXT: fmr 2, 0
114-
; AIX-NEXT: L..BB0_9: # %entry
115-
; AIX-NEXT: fmr 1, 2
66+
; AIX-NEXT: ld 3, L..C0(2) # %const.0
67+
; AIX-NEXT: lfs 1, 0(3)
11668
; AIX-NEXT: blr
11769
entry:
11870
%m = call float @llvm.minimum.f32(float %a, float %b)
@@ -159,70 +111,26 @@ define float @f32_maximum(float %a, float %b) {
159111
;
160112
; VSX-LABEL: f32_maximum:
161113
; VSX: # %bb.0: # %entry
162-
; VSX-NEXT: xscvdpspn 0, 1
163114
; VSX-NEXT: fcmpu 0, 1, 2
164-
; VSX-NEXT: xscvdpspn 3, 2
165-
; VSX-NEXT: mffprwz 3, 0
166115
; VSX-NEXT: bc 12, 3, .LBB1_2
167116
; VSX-NEXT: # %bb.1: # %entry
168-
; VSX-NEXT: xsmaxdp 0, 1, 2
169-
; VSX-NEXT: b .LBB1_3
117+
; VSX-NEXT: xsmaxdp 1, 1, 2
118+
; VSX-NEXT: blr
170119
; VSX-NEXT: .LBB1_2:
171-
; VSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha
172-
; VSX-NEXT: lfs 0, .LCPI1_0@toc@l(4)
173-
; VSX-NEXT: .LBB1_3: # %entry
174-
; VSX-NEXT: mffprwz 4, 3
175-
; VSX-NEXT: cmpwi 3, 0
176-
; VSX-NEXT: bc 12, 2, .LBB1_5
177-
; VSX-NEXT: # %bb.4: # %entry
178-
; VSX-NEXT: fmr 1, 0
179-
; VSX-NEXT: .LBB1_5: # %entry
180-
; VSX-NEXT: cmpwi 4, 0
181-
; VSX-NEXT: bc 12, 2, .LBB1_7
182-
; VSX-NEXT: # %bb.6: # %entry
183-
; VSX-NEXT: fmr 2, 1
184-
; VSX-NEXT: .LBB1_7: # %entry
185-
; VSX-NEXT: xxlxor 1, 1, 1
186-
; VSX-NEXT: fcmpu 0, 0, 1
187-
; VSX-NEXT: bc 12, 2, .LBB1_9
188-
; VSX-NEXT: # %bb.8: # %entry
189-
; VSX-NEXT: fmr 2, 0
190-
; VSX-NEXT: .LBB1_9: # %entry
191-
; VSX-NEXT: fmr 1, 2
120+
; VSX-NEXT: addis 3, 2, .LCPI1_0@toc@ha
121+
; VSX-NEXT: lfs 1, .LCPI1_0@toc@l(3)
192122
; VSX-NEXT: blr
193123
;
194124
; AIX-LABEL: f32_maximum:
195125
; AIX: # %bb.0: # %entry
196-
; AIX-NEXT: xscvdpspn 0, 1
197126
; AIX-NEXT: fcmpu 0, 1, 2
198-
; AIX-NEXT: xscvdpspn 3, 2
199-
; AIX-NEXT: mffprwz 3, 0
200127
; AIX-NEXT: bc 12, 3, L..BB1_2
201128
; AIX-NEXT: # %bb.1: # %entry
202-
; AIX-NEXT: xsmaxdp 0, 1, 2
203-
; AIX-NEXT: b L..BB1_3
129+
; AIX-NEXT: xsmaxdp 1, 1, 2
130+
; AIX-NEXT: blr
204131
; AIX-NEXT: L..BB1_2:
205-
; AIX-NEXT: ld 4, L..C1(2) # %const.0
206-
; AIX-NEXT: lfs 0, 0(4)
207-
; AIX-NEXT: L..BB1_3: # %entry
208-
; AIX-NEXT: mffprwz 4, 3
209-
; AIX-NEXT: cmpwi 3, 0
210-
; AIX-NEXT: bc 12, 2, L..BB1_5
211-
; AIX-NEXT: # %bb.4: # %entry
212-
; AIX-NEXT: fmr 1, 0
213-
; AIX-NEXT: L..BB1_5: # %entry
214-
; AIX-NEXT: cmpwi 4, 0
215-
; AIX-NEXT: bc 12, 2, L..BB1_7
216-
; AIX-NEXT: # %bb.6: # %entry
217-
; AIX-NEXT: fmr 2, 1
218-
; AIX-NEXT: L..BB1_7: # %entry
219-
; AIX-NEXT: xxlxor 1, 1, 1
220-
; AIX-NEXT: fcmpu 0, 0, 1
221-
; AIX-NEXT: bc 12, 2, L..BB1_9
222-
; AIX-NEXT: # %bb.8: # %entry
223-
; AIX-NEXT: fmr 2, 0
224-
; AIX-NEXT: L..BB1_9: # %entry
225-
; AIX-NEXT: fmr 1, 2
132+
; AIX-NEXT: ld 3, L..C1(2) # %const.0
133+
; AIX-NEXT: lfs 1, 0(3)
226134
; AIX-NEXT: blr
227135
entry:
228136
%m = call float @llvm.maximum.f32(float %a, float %b)
@@ -272,69 +180,25 @@ define double @f64_minimum(double %a, double %b) {
272180
; VSX-LABEL: f64_minimum:
273181
; VSX: # %bb.0: # %entry
274182
; VSX-NEXT: fcmpu 0, 1, 2
275-
; VSX-NEXT: mffprd 3, 1
276183
; VSX-NEXT: bc 12, 3, .LBB2_2
277184
; VSX-NEXT: # %bb.1: # %entry
278-
; VSX-NEXT: xsmindp 0, 1, 2
279-
; VSX-NEXT: b .LBB2_3
185+
; VSX-NEXT: xsmindp 1, 1, 2
186+
; VSX-NEXT: blr
280187
; VSX-NEXT: .LBB2_2:
281-
; VSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha
282-
; VSX-NEXT: lfs 0, .LCPI2_0@toc@l(4)
283-
; VSX-NEXT: .LBB2_3: # %entry
284-
; VSX-NEXT: li 5, 1
285-
; VSX-NEXT: mffprd 4, 2
286-
; VSX-NEXT: rldic 5, 5, 63, 0
287-
; VSX-NEXT: cmpd 3, 5
288-
; VSX-NEXT: bc 12, 2, .LBB2_5
289-
; VSX-NEXT: # %bb.4: # %entry
290-
; VSX-NEXT: fmr 1, 0
291-
; VSX-NEXT: .LBB2_5: # %entry
292-
; VSX-NEXT: cmpd 4, 5
293-
; VSX-NEXT: bc 12, 2, .LBB2_7
294-
; VSX-NEXT: # %bb.6: # %entry
295-
; VSX-NEXT: fmr 2, 1
296-
; VSX-NEXT: .LBB2_7: # %entry
297-
; VSX-NEXT: xxlxor 1, 1, 1
298-
; VSX-NEXT: fcmpu 0, 0, 1
299-
; VSX-NEXT: bc 12, 2, .LBB2_9
300-
; VSX-NEXT: # %bb.8: # %entry
301-
; VSX-NEXT: fmr 2, 0
302-
; VSX-NEXT: .LBB2_9: # %entry
303-
; VSX-NEXT: fmr 1, 2
188+
; VSX-NEXT: addis 3, 2, .LCPI2_0@toc@ha
189+
; VSX-NEXT: lfs 1, .LCPI2_0@toc@l(3)
304190
; VSX-NEXT: blr
305191
;
306192
; AIX-LABEL: f64_minimum:
307193
; AIX: # %bb.0: # %entry
308194
; AIX-NEXT: fcmpu 0, 1, 2
309-
; AIX-NEXT: mffprd 3, 1
310195
; AIX-NEXT: bc 12, 3, L..BB2_2
311196
; AIX-NEXT: # %bb.1: # %entry
312-
; AIX-NEXT: xsmindp 0, 1, 2
313-
; AIX-NEXT: b L..BB2_3
197+
; AIX-NEXT: xsmindp 1, 1, 2
198+
; AIX-NEXT: blr
314199
; AIX-NEXT: L..BB2_2:
315-
; AIX-NEXT: ld 4, L..C2(2) # %const.0
316-
; AIX-NEXT: lfs 0, 0(4)
317-
; AIX-NEXT: L..BB2_3: # %entry
318-
; AIX-NEXT: li 5, 1
319-
; AIX-NEXT: mffprd 4, 2
320-
; AIX-NEXT: rldic 5, 5, 63, 0
321-
; AIX-NEXT: cmpd 3, 5
322-
; AIX-NEXT: bc 12, 2, L..BB2_5
323-
; AIX-NEXT: # %bb.4: # %entry
324-
; AIX-NEXT: fmr 1, 0
325-
; AIX-NEXT: L..BB2_5: # %entry
326-
; AIX-NEXT: cmpd 4, 5
327-
; AIX-NEXT: bc 12, 2, L..BB2_7
328-
; AIX-NEXT: # %bb.6: # %entry
329-
; AIX-NEXT: fmr 2, 1
330-
; AIX-NEXT: L..BB2_7: # %entry
331-
; AIX-NEXT: xxlxor 1, 1, 1
332-
; AIX-NEXT: fcmpu 0, 0, 1
333-
; AIX-NEXT: bc 12, 2, L..BB2_9
334-
; AIX-NEXT: # %bb.8: # %entry
335-
; AIX-NEXT: fmr 2, 0
336-
; AIX-NEXT: L..BB2_9: # %entry
337-
; AIX-NEXT: fmr 1, 2
200+
; AIX-NEXT: ld 3, L..C2(2) # %const.0
201+
; AIX-NEXT: lfs 1, 0(3)
338202
; AIX-NEXT: blr
339203
entry:
340204
%m = call double @llvm.minimum.f64(double %a, double %b)
@@ -382,65 +246,25 @@ define double @f64_maximum(double %a, double %b) {
382246
; VSX-LABEL: f64_maximum:
383247
; VSX: # %bb.0: # %entry
384248
; VSX-NEXT: fcmpu 0, 1, 2
385-
; VSX-NEXT: mffprd 3, 1
386249
; VSX-NEXT: bc 12, 3, .LBB3_2
387250
; VSX-NEXT: # %bb.1: # %entry
388-
; VSX-NEXT: xsmaxdp 0, 1, 2
389-
; VSX-NEXT: b .LBB3_3
251+
; VSX-NEXT: xsmaxdp 1, 1, 2
252+
; VSX-NEXT: blr
390253
; VSX-NEXT: .LBB3_2:
391-
; VSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha
392-
; VSX-NEXT: lfs 0, .LCPI3_0@toc@l(4)
393-
; VSX-NEXT: .LBB3_3: # %entry
394-
; VSX-NEXT: mffprd 4, 2
395-
; VSX-NEXT: cmpdi 3, 0
396-
; VSX-NEXT: bc 12, 2, .LBB3_5
397-
; VSX-NEXT: # %bb.4: # %entry
398-
; VSX-NEXT: fmr 1, 0
399-
; VSX-NEXT: .LBB3_5: # %entry
400-
; VSX-NEXT: cmpdi 4, 0
401-
; VSX-NEXT: bc 12, 2, .LBB3_7
402-
; VSX-NEXT: # %bb.6: # %entry
403-
; VSX-NEXT: fmr 2, 1
404-
; VSX-NEXT: .LBB3_7: # %entry
405-
; VSX-NEXT: xxlxor 1, 1, 1
406-
; VSX-NEXT: fcmpu 0, 0, 1
407-
; VSX-NEXT: bc 12, 2, .LBB3_9
408-
; VSX-NEXT: # %bb.8: # %entry
409-
; VSX-NEXT: fmr 2, 0
410-
; VSX-NEXT: .LBB3_9: # %entry
411-
; VSX-NEXT: fmr 1, 2
254+
; VSX-NEXT: addis 3, 2, .LCPI3_0@toc@ha
255+
; VSX-NEXT: lfs 1, .LCPI3_0@toc@l(3)
412256
; VSX-NEXT: blr
413257
;
414258
; AIX-LABEL: f64_maximum:
415259
; AIX: # %bb.0: # %entry
416260
; AIX-NEXT: fcmpu 0, 1, 2
417-
; AIX-NEXT: mffprd 3, 1
418261
; AIX-NEXT: bc 12, 3, L..BB3_2
419262
; AIX-NEXT: # %bb.1: # %entry
420-
; AIX-NEXT: xsmaxdp 0, 1, 2
421-
; AIX-NEXT: b L..BB3_3
263+
; AIX-NEXT: xsmaxdp 1, 1, 2
264+
; AIX-NEXT: blr
422265
; AIX-NEXT: L..BB3_2:
423-
; AIX-NEXT: ld 4, L..C3(2) # %const.0
424-
; AIX-NEXT: lfs 0, 0(4)
425-
; AIX-NEXT: L..BB3_3: # %entry
426-
; AIX-NEXT: mffprd 4, 2
427-
; AIX-NEXT: cmpdi 3, 0
428-
; AIX-NEXT: bc 12, 2, L..BB3_5
429-
; AIX-NEXT: # %bb.4: # %entry
430-
; AIX-NEXT: fmr 1, 0
431-
; AIX-NEXT: L..BB3_5: # %entry
432-
; AIX-NEXT: cmpdi 4, 0
433-
; AIX-NEXT: bc 12, 2, L..BB3_7
434-
; AIX-NEXT: # %bb.6: # %entry
435-
; AIX-NEXT: fmr 2, 1
436-
; AIX-NEXT: L..BB3_7: # %entry
437-
; AIX-NEXT: xxlxor 1, 1, 1
438-
; AIX-NEXT: fcmpu 0, 0, 1
439-
; AIX-NEXT: bc 12, 2, L..BB3_9
440-
; AIX-NEXT: # %bb.8: # %entry
441-
; AIX-NEXT: fmr 2, 0
442-
; AIX-NEXT: L..BB3_9: # %entry
443-
; AIX-NEXT: fmr 1, 2
266+
; AIX-NEXT: ld 3, L..C3(2) # %const.0
267+
; AIX-NEXT: lfs 1, 0(3)
444268
; AIX-NEXT: blr
445269
entry:
446270
%m = call double @llvm.maximum.f64(double %a, double %b)

0 commit comments

Comments
 (0)