Skip to content

Commit db836f6

Browse files
authored
[RISCV] Narrow vector absolute value (#82041)
If we have a abs(sext a) we can legally perform this as a zext (abs a). (See the same combine in instcombine - note that the IntMinIsPoison flag doesn't exist in SDAG yet.) On RVV, this is likely profitable because it may allow us to perform the arithmetic operations involved in the abs at a narrower LMUL before widening for the user. We could arguably avoid narrowing below DLEN, but the transform should at worst move around the extend and create one extra vsetvli toggle if the source could previously be handled via loads explicit w/EEW.
1 parent a8fdc93 commit db836f6

File tree

2 files changed

+32
-16
lines changed

2 files changed

+32
-16
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1417,7 +1417,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14171417
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
14181418
ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
14191419
ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1420-
ISD::INSERT_VECTOR_ELT});
1420+
ISD::INSERT_VECTOR_ELT, ISD::ABS});
14211421
if (Subtarget.hasVendorXTHeadMemPair())
14221422
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
14231423
if (Subtarget.useRVVForFixedLengthVectors())
@@ -15611,6 +15611,19 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1561115611
return DAG.getNode(ISD::AND, DL, VT, NewFMV,
1561215612
DAG.getConstant(~SignBit, DL, VT));
1561315613
}
15614+
case ISD::ABS: {
15615+
EVT VT = N->getValueType(0);
15616+
SDValue N0 = N->getOperand(0);
15617+
// abs (sext) -> zext (abs)
15618+
// abs (zext) -> zext (handled elsewhere)
15619+
if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
15620+
SDValue Src = N0.getOperand(0);
15621+
SDLoc DL(N);
15622+
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
15623+
DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
15624+
}
15625+
break;
15626+
}
1561415627
case ISD::ADD: {
1561515628
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
1561615629
return V;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,13 @@ declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
152152
define void @abs_v4i64_of_sext_v4i8(ptr %x) {
153153
; CHECK-LABEL: abs_v4i64_of_sext_v4i8:
154154
; CHECK: # %bb.0:
155-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
155+
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
156156
; CHECK-NEXT: vle8.v v8, (a0)
157-
; CHECK-NEXT: vsext.vf8 v10, v8
158-
; CHECK-NEXT: vrsub.vi v8, v10, 0
159-
; CHECK-NEXT: vmax.vv v8, v10, v8
160-
; CHECK-NEXT: vse64.v v8, (a0)
157+
; CHECK-NEXT: vrsub.vi v9, v8, 0
158+
; CHECK-NEXT: vmax.vv v8, v8, v9
159+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
160+
; CHECK-NEXT: vzext.vf8 v10, v8
161+
; CHECK-NEXT: vse64.v v10, (a0)
161162
; CHECK-NEXT: ret
162163
%a = load <4 x i8>, ptr %x
163164
%a.ext = sext <4 x i8> %a to <4 x i64>
@@ -169,12 +170,13 @@ define void @abs_v4i64_of_sext_v4i8(ptr %x) {
169170
define void @abs_v4i64_of_sext_v4i16(ptr %x) {
170171
; CHECK-LABEL: abs_v4i64_of_sext_v4i16:
171172
; CHECK: # %bb.0:
172-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
173+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
173174
; CHECK-NEXT: vle16.v v8, (a0)
174-
; CHECK-NEXT: vsext.vf4 v10, v8
175-
; CHECK-NEXT: vrsub.vi v8, v10, 0
176-
; CHECK-NEXT: vmax.vv v8, v10, v8
177-
; CHECK-NEXT: vse64.v v8, (a0)
175+
; CHECK-NEXT: vrsub.vi v9, v8, 0
176+
; CHECK-NEXT: vmax.vv v8, v8, v9
177+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
178+
; CHECK-NEXT: vzext.vf4 v10, v8
179+
; CHECK-NEXT: vse64.v v10, (a0)
178180
; CHECK-NEXT: ret
179181
%a = load <4 x i16>, ptr %x
180182
%a.ext = sext <4 x i16> %a to <4 x i64>
@@ -186,12 +188,13 @@ define void @abs_v4i64_of_sext_v4i16(ptr %x) {
186188
define void @abs_v4i64_of_sext_v4i32(ptr %x) {
187189
; CHECK-LABEL: abs_v4i64_of_sext_v4i32:
188190
; CHECK: # %bb.0:
189-
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
191+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
190192
; CHECK-NEXT: vle32.v v8, (a0)
191-
; CHECK-NEXT: vsext.vf2 v10, v8
192-
; CHECK-NEXT: vrsub.vi v8, v10, 0
193-
; CHECK-NEXT: vmax.vv v8, v10, v8
194-
; CHECK-NEXT: vse64.v v8, (a0)
193+
; CHECK-NEXT: vrsub.vi v9, v8, 0
194+
; CHECK-NEXT: vmax.vv v8, v8, v9
195+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
196+
; CHECK-NEXT: vzext.vf2 v10, v8
197+
; CHECK-NEXT: vse64.v v10, (a0)
195198
; CHECK-NEXT: ret
196199
%a = load <4 x i32>, ptr %x
197200
%a.ext = sext <4 x i32> %a to <4 x i64>

0 commit comments

Comments
 (0)