Skip to content

Commit 2179867

Browse files
committed
[AArch64] Select saturating Neon instructions
This adds some extra patterns to select AArch64 Neon SQADD, UQADD, SQSUB and UQSUB from the existing target independent sadd_sat, uadd_sat, ssub_sat and usub_sat nodes. It does not attempt to replace the existing int_aarch64_neon_uqadd intrinsic nodes as they are apparently used for both scalar and vector, and need to be legal on scalar types for some of the patterns to work. The int_aarch64_neon_uqadd on scalar would move the two integers into floating point registers, perform a Neon uqadd and move the value back. I don't believe this is good idea for uadd_sat to do the same as the scalar alternative is simpler (an adds with a csinv). For signed it may be smaller, but I'm not sure about it being better. So this just adds some extra patterns for the existing vector instructions, matching on the _sat nodes. Differential Revision: https://reviews.llvm.org/D69374
1 parent 62c0746 commit 2179867

File tree

9 files changed

+305
-979
lines changed

9 files changed

+305
-979
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,14 +741,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
741741
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
742742
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
743743

744-
// Vector reductions
745744
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
746745
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
746+
// Vector reductions
747747
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
748748
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
749749
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
750750
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
751751
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
752+
753+
// Saturates
754+
setOperationAction(ISD::SADDSAT, VT, Legal);
755+
setOperationAction(ISD::UADDSAT, VT, Legal);
756+
setOperationAction(ISD::SSUBSAT, VT, Legal);
757+
setOperationAction(ISD::USUBSAT, VT, Legal);
752758
}
753759
for (MVT VT : { MVT::v4f16, MVT::v2f32,
754760
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5066,6 +5066,24 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
50665066
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
50675067
}
50685068

5069+
multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
5070+
def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
5071+
(!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
5072+
def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
5073+
(!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
5074+
def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
5075+
(!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;
5076+
5077+
def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
5078+
(!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
5079+
def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
5080+
(!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
5081+
def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
5082+
(!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
5083+
def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
5084+
(!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
5085+
}
5086+
50695087
// As above, but D sized elements unsupported.
50705088
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
50715089
SDPatternOperator OpNode> {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3839,6 +3839,12 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
38393839
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
38403840
int_aarch64_neon_sqsub>;
38413841

3842+
// Extra saturate patterns, other than the intrinsics matches above
3843+
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
3844+
defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
3845+
defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
3846+
defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
3847+
38423848
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
38433849
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
38443850
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;

llvm/test/CodeGen/AArch64/sadd_sat.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,7 @@ define i4 @func3(i4 %x, i4 %y) nounwind {
8888
define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
8989
; CHECK-LABEL: vec:
9090
; CHECK: // %bb.0:
91-
; CHECK-NEXT: add v2.4s, v0.4s, v1.4s
92-
; CHECK-NEXT: cmlt v4.4s, v2.4s, #0
93-
; CHECK-NEXT: mvni v3.4s, #128, lsl #24
94-
; CHECK-NEXT: cmlt v1.4s, v1.4s, #0
95-
; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s
96-
; CHECK-NEXT: mvn v5.16b, v4.16b
97-
; CHECK-NEXT: bsl v3.16b, v4.16b, v5.16b
98-
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
99-
; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b
91+
; CHECK-NEXT: sqadd v0.4s, v0.4s, v1.4s
10092
; CHECK-NEXT: ret
10193
%tmp = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y);
10294
ret <4 x i32> %tmp;

0 commit comments

Comments
 (0)