Skip to content

Commit 8cdbf8d

Browse files
committed
[SelectionDAG][AArch64][ARM] Remove setFlags call from DAGTypeLegalizer::SetPromotedInteger.
This was originally added to preserve FMF on SETCC. Unfortunately, it also incorrectly preserves nuw/nsw on ADD/SUB in some cases. There's also no guarantee the new opcode is even the same opcode as the original node. This patch removes the code and adds code to explicitly preserve FMF flags in the SETCC promotion function. The other test changes are from nuw/nsw not being preserved. I believe for all these tests it was correct to preserve the flags, so we need new code to preserve the flags when possible. I'll post another patch for that since it's a riskier change. This should unblock D150769. Differential Revision: https://reviews.llvm.org/D151472
1 parent 8f7b51e commit 8cdbf8d

File tree

5 files changed

+77
-114
lines changed

5 files changed

+77
-114
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1184,16 +1184,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
11841184
// Get the SETCC result using the canonical SETCC type.
11851185
SDValue SetCC;
11861186
if (N->isStrictFPOpcode()) {
1187-
EVT VTs[] = {SVT, MVT::Other};
1187+
SDVTList VTs = DAG.getVTList({SVT, MVT::Other});
11881188
SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
11891189
N->getOperand(2), N->getOperand(3)};
1190-
SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
1190+
SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers, N->getFlags());
11911191
// Legalize the chain result - switch anything that used the old chain to
11921192
// use the new one.
11931193
ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
11941194
} else
11951195
SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
1196-
N->getOperand(1), N->getOperand(2));
1196+
N->getOperand(1), N->getOperand(2), N->getFlags());
11971197

11981198
// Convert to the expected type.
11991199
return DAG.getSExtOrTrunc(SetCC, dl, NVT);

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,6 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
715715
auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
716716
assert((OpIdEntry == 0) && "Node is already promoted!");
717717
OpIdEntry = getTableId(Result);
718-
Result->setFlags(Op->getFlags());
719718

720719
DAG.transferDbgValues(Op, Result);
721720
}

llvm/test/CodeGen/AArch64/arm64-vhadd.ll

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -873,8 +873,8 @@ define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
873873
; CHECK-NEXT: shl.2s v0, v0, #24
874874
; CHECK-NEXT: shl.2s v1, v1, #24
875875
; CHECK-NEXT: sshr.2s v0, v0, #24
876-
; CHECK-NEXT: sshr.2s v1, v1, #24
877-
; CHECK-NEXT: shadd.2s v0, v0, v1
876+
; CHECK-NEXT: ssra.2s v0, v1, #24
877+
; CHECK-NEXT: sshr.2s v0, v0, #1
878878
; CHECK-NEXT: ret
879879
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
880880
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -889,7 +889,8 @@ define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
889889
; CHECK-NEXT: movi d2, #0x0000ff000000ff
890890
; CHECK-NEXT: and.8b v0, v0, v2
891891
; CHECK-NEXT: and.8b v1, v1, v2
892-
; CHECK-NEXT: uhadd.2s v0, v0, v1
892+
; CHECK-NEXT: add.2s v0, v0, v1
893+
; CHECK-NEXT: ushr.2s v0, v0, #1
893894
; CHECK-NEXT: ret
894895
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
895896
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -922,7 +923,8 @@ define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
922923
; CHECK-NEXT: movi d2, #0x0000ff000000ff
923924
; CHECK-NEXT: and.8b v0, v0, v2
924925
; CHECK-NEXT: and.8b v1, v1, v2
925-
; CHECK-NEXT: uhadd.2s v0, v0, v1
926+
; CHECK-NEXT: add.2s v0, v0, v1
927+
; CHECK-NEXT: ushr.2s v0, v0, #1
926928
; CHECK-NEXT: ret
927929
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
928930
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1004,7 +1006,9 @@ define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
10041006
; CHECK-NEXT: shl.2s v1, v1, #24
10051007
; CHECK-NEXT: sshr.2s v0, v0, #24
10061008
; CHECK-NEXT: sshr.2s v1, v1, #24
1007-
; CHECK-NEXT: srhadd.2s v0, v0, v1
1009+
; CHECK-NEXT: mvn.8b v0, v0
1010+
; CHECK-NEXT: sub.2s v0, v1, v0
1011+
; CHECK-NEXT: sshr.2s v0, v0, #1
10081012
; CHECK-NEXT: ret
10091013
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
10101014
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16>
@@ -1020,7 +1024,9 @@ define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
10201024
; CHECK-NEXT: movi d2, #0x0000ff000000ff
10211025
; CHECK-NEXT: and.8b v0, v0, v2
10221026
; CHECK-NEXT: and.8b v1, v1, v2
1023-
; CHECK-NEXT: urhadd.2s v0, v0, v1
1027+
; CHECK-NEXT: mvn.8b v0, v0
1028+
; CHECK-NEXT: sub.2s v0, v1, v0
1029+
; CHECK-NEXT: ushr.2s v0, v0, #1
10241030
; CHECK-NEXT: ret
10251031
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
10261032
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>
@@ -1035,12 +1041,12 @@ define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
10351041
; CHECK: // %bb.0:
10361042
; CHECK-NEXT: shl.2s v0, v0, #24
10371043
; CHECK-NEXT: shl.2s v1, v1, #24
1038-
; CHECK-NEXT: movi.2s v2, #1
1044+
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
10391045
; CHECK-NEXT: sshr.2s v0, v0, #24
1040-
; CHECK-NEXT: ssra.2s v0, v1, #24
1041-
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
1042-
; CHECK-NEXT: add.2s v0, v0, v2
1043-
; CHECK-NEXT: and.8b v0, v0, v1
1046+
; CHECK-NEXT: sshr.2s v1, v1, #24
1047+
; CHECK-NEXT: mvn.8b v0, v0
1048+
; CHECK-NEXT: sub.2s v0, v1, v0
1049+
; CHECK-NEXT: and.8b v0, v0, v2
10441050
; CHECK-NEXT: ushr.2s v0, v0, #1
10451051
; CHECK-NEXT: ret
10461052
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16>
@@ -1057,7 +1063,9 @@ define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
10571063
; CHECK-NEXT: movi d2, #0x0000ff000000ff
10581064
; CHECK-NEXT: and.8b v0, v0, v2
10591065
; CHECK-NEXT: and.8b v1, v1, v2
1060-
; CHECK-NEXT: urhadd.2s v0, v0, v1
1066+
; CHECK-NEXT: mvn.8b v0, v0
1067+
; CHECK-NEXT: sub.2s v0, v1, v0
1068+
; CHECK-NEXT: ushr.2s v0, v0, #1
10611069
; CHECK-NEXT: ret
10621070
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16>
10631071
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16>

llvm/test/CodeGen/AArch64/sve-hadd.ll

Lines changed: 48 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -219,22 +219,14 @@ entry:
219219
}
220220

221221
define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
222-
; SVE-LABEL: hadds_v2i16:
223-
; SVE: // %bb.0: // %entry
224-
; SVE-NEXT: ptrue p0.d
225-
; SVE-NEXT: sxth z0.d, p0/m, z0.d
226-
; SVE-NEXT: sxth z1.d, p0/m, z1.d
227-
; SVE-NEXT: add z0.d, z0.d, z1.d
228-
; SVE-NEXT: asr z0.d, z0.d, #1
229-
; SVE-NEXT: ret
230-
;
231-
; SVE2-LABEL: hadds_v2i16:
232-
; SVE2: // %bb.0: // %entry
233-
; SVE2-NEXT: ptrue p0.d
234-
; SVE2-NEXT: sxth z0.d, p0/m, z0.d
235-
; SVE2-NEXT: sxth z1.d, p0/m, z1.d
236-
; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d
237-
; SVE2-NEXT: ret
222+
; CHECK-LABEL: hadds_v2i16:
223+
; CHECK: // %bb.0: // %entry
224+
; CHECK-NEXT: ptrue p0.d
225+
; CHECK-NEXT: sxth z0.d, p0/m, z0.d
226+
; CHECK-NEXT: sxth z1.d, p0/m, z1.d
227+
; CHECK-NEXT: add z0.d, z0.d, z1.d
228+
; CHECK-NEXT: asr z0.d, z0.d, #1
229+
; CHECK-NEXT: ret
238230
entry:
239231
%s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
240232
%s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -264,21 +256,13 @@ entry:
264256
}
265257

266258
define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
267-
; SVE-LABEL: haddu_v2i16:
268-
; SVE: // %bb.0: // %entry
269-
; SVE-NEXT: and z0.d, z0.d, #0xffff
270-
; SVE-NEXT: and z1.d, z1.d, #0xffff
271-
; SVE-NEXT: add z0.d, z0.d, z1.d
272-
; SVE-NEXT: lsr z0.d, z0.d, #1
273-
; SVE-NEXT: ret
274-
;
275-
; SVE2-LABEL: haddu_v2i16:
276-
; SVE2: // %bb.0: // %entry
277-
; SVE2-NEXT: ptrue p0.d
278-
; SVE2-NEXT: and z0.d, z0.d, #0xffff
279-
; SVE2-NEXT: and z1.d, z1.d, #0xffff
280-
; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d
281-
; SVE2-NEXT: ret
259+
; CHECK-LABEL: haddu_v2i16:
260+
; CHECK: // %bb.0: // %entry
261+
; CHECK-NEXT: and z0.d, z0.d, #0xffff
262+
; CHECK-NEXT: and z1.d, z1.d, #0xffff
263+
; CHECK-NEXT: add z0.d, z0.d, z1.d
264+
; CHECK-NEXT: lsr z0.d, z0.d, #1
265+
; CHECK-NEXT: ret
282266
entry:
283267
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
284268
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -433,22 +417,14 @@ entry:
433417
}
434418

435419
define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
436-
; SVE-LABEL: hadds_v4i8:
437-
; SVE: // %bb.0: // %entry
438-
; SVE-NEXT: ptrue p0.s
439-
; SVE-NEXT: sxtb z0.s, p0/m, z0.s
440-
; SVE-NEXT: sxtb z1.s, p0/m, z1.s
441-
; SVE-NEXT: add z0.s, z0.s, z1.s
442-
; SVE-NEXT: asr z0.s, z0.s, #1
443-
; SVE-NEXT: ret
444-
;
445-
; SVE2-LABEL: hadds_v4i8:
446-
; SVE2: // %bb.0: // %entry
447-
; SVE2-NEXT: ptrue p0.s
448-
; SVE2-NEXT: sxtb z0.s, p0/m, z0.s
449-
; SVE2-NEXT: sxtb z1.s, p0/m, z1.s
450-
; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s
451-
; SVE2-NEXT: ret
420+
; CHECK-LABEL: hadds_v4i8:
421+
; CHECK: // %bb.0: // %entry
422+
; CHECK-NEXT: ptrue p0.s
423+
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
424+
; CHECK-NEXT: sxtb z1.s, p0/m, z1.s
425+
; CHECK-NEXT: add z0.s, z0.s, z1.s
426+
; CHECK-NEXT: asr z0.s, z0.s, #1
427+
; CHECK-NEXT: ret
452428
entry:
453429
%s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
454430
%s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -478,21 +454,13 @@ entry:
478454
}
479455

480456
define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
481-
; SVE-LABEL: haddu_v4i8:
482-
; SVE: // %bb.0: // %entry
483-
; SVE-NEXT: and z0.s, z0.s, #0xff
484-
; SVE-NEXT: and z1.s, z1.s, #0xff
485-
; SVE-NEXT: add z0.s, z0.s, z1.s
486-
; SVE-NEXT: lsr z0.s, z0.s, #1
487-
; SVE-NEXT: ret
488-
;
489-
; SVE2-LABEL: haddu_v4i8:
490-
; SVE2: // %bb.0: // %entry
491-
; SVE2-NEXT: ptrue p0.s
492-
; SVE2-NEXT: and z0.s, z0.s, #0xff
493-
; SVE2-NEXT: and z1.s, z1.s, #0xff
494-
; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s
495-
; SVE2-NEXT: ret
457+
; CHECK-LABEL: haddu_v4i8:
458+
; CHECK: // %bb.0: // %entry
459+
; CHECK-NEXT: and z0.s, z0.s, #0xff
460+
; CHECK-NEXT: and z1.s, z1.s, #0xff
461+
; CHECK-NEXT: add z0.s, z0.s, z1.s
462+
; CHECK-NEXT: lsr z0.s, z0.s, #1
463+
; CHECK-NEXT: ret
496464
entry:
497465
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
498466
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>
@@ -916,23 +884,15 @@ entry:
916884
}
917885

918886
define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) {
919-
; SVE-LABEL: rhaddu_v2i16:
920-
; SVE: // %bb.0: // %entry
921-
; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
922-
; SVE-NEXT: and z0.d, z0.d, #0xffff
923-
; SVE-NEXT: and z1.d, z1.d, #0xffff
924-
; SVE-NEXT: eor z0.d, z0.d, z2.d
925-
; SVE-NEXT: sub z0.d, z1.d, z0.d
926-
; SVE-NEXT: lsr z0.d, z0.d, #1
927-
; SVE-NEXT: ret
928-
;
929-
; SVE2-LABEL: rhaddu_v2i16:
930-
; SVE2: // %bb.0: // %entry
931-
; SVE2-NEXT: ptrue p0.d
932-
; SVE2-NEXT: and z0.d, z0.d, #0xffff
933-
; SVE2-NEXT: and z1.d, z1.d, #0xffff
934-
; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d
935-
; SVE2-NEXT: ret
887+
; CHECK-LABEL: rhaddu_v2i16:
888+
; CHECK: // %bb.0: // %entry
889+
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
890+
; CHECK-NEXT: and z0.d, z0.d, #0xffff
891+
; CHECK-NEXT: and z1.d, z1.d, #0xffff
892+
; CHECK-NEXT: eor z0.d, z0.d, z2.d
893+
; CHECK-NEXT: sub z0.d, z1.d, z0.d
894+
; CHECK-NEXT: lsr z0.d, z0.d, #1
895+
; CHECK-NEXT: ret
936896
entry:
937897
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32>
938898
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32>
@@ -1135,23 +1095,15 @@ entry:
11351095
}
11361096

11371097
define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) {
1138-
; SVE-LABEL: rhaddu_v4i8:
1139-
; SVE: // %bb.0: // %entry
1140-
; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1141-
; SVE-NEXT: and z0.s, z0.s, #0xff
1142-
; SVE-NEXT: and z1.s, z1.s, #0xff
1143-
; SVE-NEXT: eor z0.d, z0.d, z2.d
1144-
; SVE-NEXT: sub z0.s, z1.s, z0.s
1145-
; SVE-NEXT: lsr z0.s, z0.s, #1
1146-
; SVE-NEXT: ret
1147-
;
1148-
; SVE2-LABEL: rhaddu_v4i8:
1149-
; SVE2: // %bb.0: // %entry
1150-
; SVE2-NEXT: ptrue p0.s
1151-
; SVE2-NEXT: and z0.s, z0.s, #0xff
1152-
; SVE2-NEXT: and z1.s, z1.s, #0xff
1153-
; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s
1154-
; SVE2-NEXT: ret
1098+
; CHECK-LABEL: rhaddu_v4i8:
1099+
; CHECK: // %bb.0: // %entry
1100+
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
1101+
; CHECK-NEXT: and z0.s, z0.s, #0xff
1102+
; CHECK-NEXT: and z1.s, z1.s, #0xff
1103+
; CHECK-NEXT: eor z0.d, z0.d, z2.d
1104+
; CHECK-NEXT: sub z0.s, z1.s, z0.s
1105+
; CHECK-NEXT: lsr z0.s, z0.s, #1
1106+
; CHECK-NEXT: ret
11551107
entry:
11561108
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16>
11571109
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16>

llvm/test/CodeGen/Thumb2/mve-vhadd.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@ define arm_aapcs_vfpcc <4 x i8> @vhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) {
116116
; CHECK-NEXT: vmov.i32 q2, #0xff
117117
; CHECK-NEXT: vand q1, q1, q2
118118
; CHECK-NEXT: vand q0, q0, q2
119-
; CHECK-NEXT: vhadd.u32 q0, q0, q1
119+
; CHECK-NEXT: vadd.i32 q0, q0, q1
120+
; CHECK-NEXT: vshr.u32 q0, q0, #1
120121
; CHECK-NEXT: bx lr
121122
entry:
122123
%s0s = zext <4 x i8> %s0 to <4 x i16>
@@ -312,9 +313,12 @@ define arm_aapcs_vfpcc <4 x i8> @vrhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) {
312313
; CHECK-LABEL: vrhaddu_v4i8:
313314
; CHECK: @ %bb.0: @ %entry
314315
; CHECK-NEXT: vmov.i32 q2, #0xff
316+
; CHECK-NEXT: movs r0, #1
315317
; CHECK-NEXT: vand q1, q1, q2
316318
; CHECK-NEXT: vand q0, q0, q2
317-
; CHECK-NEXT: vrhadd.u32 q0, q0, q1
319+
; CHECK-NEXT: vadd.i32 q0, q0, q1
320+
; CHECK-NEXT: vadd.i32 q0, q0, r0
321+
; CHECK-NEXT: vshr.u32 q0, q0, #1
318322
; CHECK-NEXT: bx lr
319323
entry:
320324
%s0s = zext <4 x i8> %s0 to <4 x i16>

0 commit comments

Comments
 (0)