Skip to content

Commit d48ebac

Browse files
[LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. (#88136)
The immediate forms of SQADD and SQSUB interpret their immediate operand as unsigned and thus effectively only support positive immediate operands. The original code is only wrong for the i8 cases because they previously accepted all values, however, the new patterns enable more uses and this I've added tests for the larger element types as well.
1 parent 9da0ef1 commit d48ebac

File tree

4 files changed

+191
-2
lines changed

4 files changed

+191
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
258258
return SelectSVEAddSubImm(N, VT, Imm, Shift);
259259
}
260260

261+
template <MVT::SimpleValueType VT, bool Negate>
262+
bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263+
return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
264+
}
265+
261266
template <MVT::SimpleValueType VT>
262267
bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
263268
return SelectSVECpyDupImm(N, VT, Imm, Shift);
@@ -484,6 +489,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
484489
bool SelectCMP_SWAP(SDNode *N);
485490

486491
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
492+
bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
493+
bool Negate);
487494
bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
488495
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
489496

@@ -4014,6 +4021,56 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
40144021
return false;
40154022
}
40164023

4024+
bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4025+
SDValue &Imm, SDValue &Shift,
4026+
bool Negate) {
4027+
if (!isa<ConstantSDNode>(N))
4028+
return false;
4029+
4030+
SDLoc DL(N);
4031+
int64_t Val = cast<ConstantSDNode>(N)
4032+
->getAPIntValue()
4033+
.trunc(VT.getFixedSizeInBits())
4034+
.getSExtValue();
4035+
4036+
if (Negate)
4037+
Val = -Val;
4038+
4039+
// Signed saturating instructions treat their immediate operand as unsigned,
4040+
// whereas the related intrinsics define their operands to be signed. This
4041+
// means we can only use the immediate form when the operand is non-negative.
4042+
if (Val < 0)
4043+
return false;
4044+
4045+
switch (VT.SimpleTy) {
4046+
case MVT::i8:
4047+
// All positive immediates are supported.
4048+
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4049+
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4050+
return true;
4051+
case MVT::i16:
4052+
case MVT::i32:
4053+
case MVT::i64:
4054+
// Support 8bit positive immediates.
4055+
if (Val <= 255) {
4056+
Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4057+
Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4058+
return true;
4059+
}
4060+
// Support 16bit positive immediates that are a multiple of 256.
4061+
if (Val <= 65280 && Val % 256 == 0) {
4062+
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4063+
Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4064+
return true;
4065+
}
4066+
break;
4067+
default:
4068+
break;
4069+
}
4070+
4071+
return false;
4072+
}
4073+
40174074
bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
40184075
SDValue &Shift) {
40194076
if (!isa<ConstantSDNode>(N))

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,9 +554,9 @@ let Predicates = [HasSVEorSME] in {
554554
defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>;
555555
defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>;
556556
defm SUBR_ZI : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
557-
defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
557+
defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat, ssubsat>;
558558
defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
559-
defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
559+
defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat, saddsat>;
560560
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
561561

562562
defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,16 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
249249
def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
250250
def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
251251

252+
def SVEAddSubSSatNegImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, true>", []>;
253+
def SVEAddSubSSatNegImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, true>", []>;
254+
def SVEAddSubSSatNegImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, true>", []>;
255+
def SVEAddSubSSatNegImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, true>", []>;
256+
257+
def SVEAddSubSSatPosImm8Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8, false>", []>;
258+
def SVEAddSubSSatPosImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16, false>", []>;
259+
def SVEAddSubSSatPosImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32, false>", []>;
260+
def SVEAddSubSSatPosImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64, false>", []>;
261+
252262
def SVECpyDupImm8Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
253263
def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
254264
def SVECpyDupImm32Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i32>", []>;
@@ -4775,6 +4785,24 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
47754785
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
47764786
}
47774787

4788+
multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op,
4789+
SDPatternOperator inv_op> {
4790+
def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>;
4791+
def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
4792+
def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
4793+
def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
4794+
4795+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubSSatPosImm8Pat, !cast<Instruction>(NAME # _B)>;
4796+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatPosImm16Pat, !cast<Instruction>(NAME # _H)>;
4797+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatPosImm32Pat, !cast<Instruction>(NAME # _S)>;
4798+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatPosImm64Pat, !cast<Instruction>(NAME # _D)>;
4799+
4800+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, inv_op, ZPR8, i32, SVEAddSubSSatNegImm8Pat, !cast<Instruction>(NAME # _B)>;
4801+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubSSatNegImm16Pat, !cast<Instruction>(NAME # _H)>;
4802+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubSSatNegImm32Pat, !cast<Instruction>(NAME # _S)>;
4803+
def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubSSatNegImm64Pat, !cast<Instruction>(NAME # _D)>;
4804+
}
4805+
47784806
class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
47794807
ZPRRegOp zprty, Operand immtype>
47804808
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),

llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,19 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
10591059
ret <vscale x 16 x i8> %out
10601060
}
10611061

1062+
; Immediate instruction form only supports positive values.
1063+
define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
1064+
; CHECK-LABEL: sqadd_b_negimm:
1065+
; CHECK: // %bb.0:
1066+
; CHECK-NEXT: sqsub z0.b, z0.b, #128 // =0x80
1067+
; CHECK-NEXT: ret
1068+
%elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
1069+
%splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1070+
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1071+
<vscale x 16 x i8> %splat)
1072+
ret <vscale x 16 x i8> %out
1073+
}
1074+
10621075
define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
10631076
; CHECK-LABEL: sqadd_h_lowimm:
10641077
; CHECK: // %bb.0:
@@ -1083,6 +1096,19 @@ define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
10831096
ret <vscale x 8 x i16> %out
10841097
}
10851098

1099+
; Immediate instruction form only supports positive values.
1100+
define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
1101+
; CHECK-LABEL: sqadd_h_negimm:
1102+
; CHECK: // %bb.0:
1103+
; CHECK-NEXT: sqsub z0.h, z0.h, #1 // =0x1
1104+
; CHECK-NEXT: ret
1105+
%elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
1106+
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1107+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1108+
<vscale x 8 x i16> %splat)
1109+
ret <vscale x 8 x i16> %out
1110+
}
1111+
10861112
define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
10871113
; CHECK-LABEL: sqadd_s_lowimm:
10881114
; CHECK: // %bb.0:
@@ -1107,6 +1133,19 @@ define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
11071133
ret <vscale x 4 x i32> %out
11081134
}
11091135

1136+
; Immediate instruction form only supports positive values.
1137+
define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
1138+
; CHECK-LABEL: sqadd_s_negimm:
1139+
; CHECK: // %bb.0:
1140+
; CHECK-NEXT: sqsub z0.s, z0.s, #65280 // =0xff00
1141+
; CHECK-NEXT: ret
1142+
%elt = insertelement <vscale x 4 x i32> undef, i32 -65280, i32 0
1143+
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1144+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1145+
<vscale x 4 x i32> %splat)
1146+
ret <vscale x 4 x i32> %out
1147+
}
1148+
11101149
define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
11111150
; CHECK-LABEL: sqadd_d_lowimm:
11121151
; CHECK: // %bb.0:
@@ -1131,6 +1170,19 @@ define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
11311170
ret <vscale x 2 x i64> %out
11321171
}
11331172

1173+
; Immediate instruction form only supports positive values.
1174+
define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
1175+
; CHECK-LABEL: sqadd_d_negimm:
1176+
; CHECK: // %bb.0:
1177+
; CHECK-NEXT: sqsub z0.d, z0.d, #3840 // =0xf00
1178+
; CHECK-NEXT: ret
1179+
%elt = insertelement <vscale x 2 x i64> undef, i64 -3840, i32 0
1180+
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1181+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1182+
<vscale x 2 x i64> %splat)
1183+
ret <vscale x 2 x i64> %out
1184+
}
1185+
11341186
; SQSUB
11351187

11361188
define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
@@ -1145,6 +1197,19 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
11451197
ret <vscale x 16 x i8> %out
11461198
}
11471199

1200+
; Immediate instruction form only supports positive values.
1201+
define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
1202+
; CHECK-LABEL: sqsub_b_negimm:
1203+
; CHECK: // %bb.0:
1204+
; CHECK-NEXT: sqadd z0.b, z0.b, #1 // =0x1
1205+
; CHECK-NEXT: ret
1206+
%elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
1207+
%splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1208+
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1209+
<vscale x 16 x i8> %splat)
1210+
ret <vscale x 16 x i8> %out
1211+
}
1212+
11481213
define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
11491214
; CHECK-LABEL: sqsub_h_lowimm:
11501215
; CHECK: // %bb.0:
@@ -1169,6 +1234,19 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
11691234
ret <vscale x 8 x i16> %out
11701235
}
11711236

1237+
; Immediate instruction form only supports positive values.
1238+
define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
1239+
; CHECK-LABEL: sqsub_h_negimm:
1240+
; CHECK: // %bb.0:
1241+
; CHECK-NEXT: sqadd z0.h, z0.h, #128 // =0x80
1242+
; CHECK-NEXT: ret
1243+
%elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
1244+
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1245+
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1246+
<vscale x 8 x i16> %splat)
1247+
ret <vscale x 8 x i16> %out
1248+
}
1249+
11721250
define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
11731251
; CHECK-LABEL: sqsub_s_lowimm:
11741252
; CHECK: // %bb.0:
@@ -1193,6 +1271,19 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
11931271
ret <vscale x 4 x i32> %out
11941272
}
11951273

1274+
; Immediate instruction form only supports positive values.
1275+
define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
1276+
; CHECK-LABEL: sqsub_s_negimm:
1277+
; CHECK: // %bb.0:
1278+
; CHECK-NEXT: sqadd z0.s, z0.s, #32768 // =0x8000
1279+
; CHECK-NEXT: ret
1280+
%elt = insertelement <vscale x 4 x i32> undef, i32 -32768, i32 0
1281+
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1282+
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1283+
<vscale x 4 x i32> %splat)
1284+
ret <vscale x 4 x i32> %out
1285+
}
1286+
11961287
define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
11971288
; CHECK-LABEL: sqsub_d_lowimm:
11981289
; CHECK: // %bb.0:
@@ -1217,6 +1308,19 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
12171308
ret <vscale x 2 x i64> %out
12181309
}
12191310

1311+
; Immediate instruction form only supports positive values.
1312+
define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
1313+
; CHECK-LABEL: sqsub_d_negimm:
1314+
; CHECK: // %bb.0:
1315+
; CHECK-NEXT: sqadd z0.d, z0.d, #57344 // =0xe000
1316+
; CHECK-NEXT: ret
1317+
%elt = insertelement <vscale x 2 x i64> undef, i64 -57344, i32 0
1318+
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1319+
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1320+
<vscale x 2 x i64> %splat)
1321+
ret <vscale x 2 x i64> %out
1322+
}
1323+
12201324
; UQADD
12211325

12221326
define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {

0 commit comments

Comments
 (0)