Skip to content

Commit 51e9007

Browse files
committed
[RISCV] Handle scalable ops with EEW > 2 dests in combineBinOp_VLToVWBinOp_VL
We can remove the restriction that the narrow type needs to be exactly EEW / 2 for scalable ISD::{ADD,SUB,MUL} nodes. This allows us to perform the combine even if we can't fully fold the extend into the widening op. VP intrinsics already do this, since they are lowered to _VL nodes which don't have this restriction. The "exactly EEW / 2" narrow type restriction prevented us from emitting V{S,Z}EXT_VL nodes with i1 element types which crash when we try to select them, since no other legal type is double the size of i1. So to preserve this, this also restricts the combine to only run after the legalize vector ops phase, at which point all unselectable i1 vectors should be custom lowered away.
1 parent fbc0d53 commit 51e9007

File tree

5 files changed

+613
-605
lines changed

5 files changed

+613
-605
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13646,16 +13646,6 @@ struct NodeExtensionHelper {
1364613646
if (!VT.isVector())
1364713647
break;
1364813648

13649-
SDValue NarrowElt = OrigOperand.getOperand(0);
13650-
MVT NarrowVT = NarrowElt.getSimpleValueType();
13651-
13652-
unsigned ScalarBits = VT.getScalarSizeInBits();
13653-
unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();
13654-
13655-
// Ensure the extension's semantic is equivalent to rvv vzext or vsext.
13656-
if (ScalarBits != NarrowScalarBits * 2)
13657-
break;
13658-
1365913649
SupportsZExt = Opc == ISD::ZERO_EXTEND;
1366013650
SupportsSExt = Opc == ISD::SIGN_EXTEND;
1366113651

@@ -14106,7 +14096,9 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
1410614096
TargetLowering::DAGCombinerInfo &DCI,
1410714097
const RISCVSubtarget &Subtarget) {
1410814098
SelectionDAG &DAG = DCI.DAG;
14109-
if (DCI.isBeforeLegalize())
14099+
// Don't perform this until types are legalized and any legal i1 types are
14100+
// custom lowered to avoid introducing unselectable V{S,Z}EXT_VLs.
14101+
if (DCI.isBeforeLegalizeOps())
1411014102
return SDValue();
1411114103

1411214104
if (!NodeExtensionHelper::isSupportedRoot(N))

llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -283,18 +283,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y,
283283
;
284284
; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
285285
; FOLDING: # %bb.0:
286-
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
286+
; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
287287
; FOLDING-NEXT: vle8.v v8, (a0)
288288
; FOLDING-NEXT: vle8.v v9, (a1)
289289
; FOLDING-NEXT: vle8.v v10, (a2)
290-
; FOLDING-NEXT: vsext.vf4 v11, v8
291-
; FOLDING-NEXT: vsext.vf4 v8, v9
292-
; FOLDING-NEXT: vsext.vf4 v9, v10
293-
; FOLDING-NEXT: vmul.vv v8, v11, v8
294-
; FOLDING-NEXT: vadd.vv v10, v11, v9
295-
; FOLDING-NEXT: vsub.vv v9, v11, v9
296-
; FOLDING-NEXT: vor.vv v8, v8, v10
297-
; FOLDING-NEXT: vor.vv v8, v8, v9
290+
; FOLDING-NEXT: vsext.vf2 v11, v8
291+
; FOLDING-NEXT: vsext.vf2 v8, v9
292+
; FOLDING-NEXT: vsext.vf2 v9, v10
293+
; FOLDING-NEXT: vwmul.vv v10, v11, v8
294+
; FOLDING-NEXT: vwadd.vv v8, v11, v9
295+
; FOLDING-NEXT: vwsub.vv v12, v11, v9
296+
; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
297+
; FOLDING-NEXT: vor.vv v8, v10, v8
298+
; FOLDING-NEXT: vor.vv v8, v8, v12
298299
; FOLDING-NEXT: ret
299300
%a = load <vscale x 2 x i8>, ptr %x
300301
%b = load <vscale x 2 x i8>, ptr %y
@@ -563,18 +564,19 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y,
563564
;
564565
; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
565566
; FOLDING: # %bb.0:
566-
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
567+
; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
567568
; FOLDING-NEXT: vle8.v v8, (a0)
568569
; FOLDING-NEXT: vle8.v v9, (a1)
569570
; FOLDING-NEXT: vle8.v v10, (a2)
570-
; FOLDING-NEXT: vzext.vf4 v11, v8
571-
; FOLDING-NEXT: vzext.vf4 v8, v9
572-
; FOLDING-NEXT: vzext.vf4 v9, v10
573-
; FOLDING-NEXT: vmul.vv v8, v11, v8
574-
; FOLDING-NEXT: vadd.vv v10, v11, v9
575-
; FOLDING-NEXT: vsub.vv v9, v11, v9
576-
; FOLDING-NEXT: vor.vv v8, v8, v10
577-
; FOLDING-NEXT: vor.vv v8, v8, v9
571+
; FOLDING-NEXT: vzext.vf2 v11, v8
572+
; FOLDING-NEXT: vzext.vf2 v8, v9
573+
; FOLDING-NEXT: vzext.vf2 v9, v10
574+
; FOLDING-NEXT: vwmulu.vv v10, v11, v8
575+
; FOLDING-NEXT: vwaddu.vv v8, v11, v9
576+
; FOLDING-NEXT: vwsubu.vv v12, v11, v9
577+
; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
578+
; FOLDING-NEXT: vor.vv v8, v10, v8
579+
; FOLDING-NEXT: vor.vv v8, v8, v12
578580
; FOLDING-NEXT: ret
579581
%a = load <vscale x 2 x i8>, ptr %x
580582
%b = load <vscale x 2 x i8>, ptr %y

0 commit comments

Comments
 (0)