Skip to content

Commit e027e04

Browse files
authored
[DAGCombiner] Don't let scalarizeBinOpOfSplats create illegal scalar MULHS/MULHU (#104518)
Type legalization lacks generic support for these operations. They are normally only created when the type is legal. This scalarization case is new. We could update type legalization, but there some corner cases that make it not straightforward. For example, if the promoted type isn't 2x the narrow type we need to over promote. Fixes #104480
1 parent 5c3a3dc commit e027e04

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27140,6 +27140,10 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
2714027140
: TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)))
2714127141
return SDValue();
2714227142

27143+
// FIXME: Type legalization can't handle illegal MULHS/MULHU.
27144+
if ((Opcode == ISD::MULHS || Opcode == ISD::MULHU) && !TLI.isTypeLegal(EltVT))
27145+
return SDValue();
27146+
2714327147
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
2714427148
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
2714527149
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v | FileCheck %s
3+
4+
define <vscale x 4 x i16> @test_mulhs_promote(<vscale x 4 x i16> %broadcast.splatinsert, <vscale x 4 x i1> %0, <vscale x 4 x i1> %1) {
5+
; CHECK-LABEL: test_mulhs_promote:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
8+
; CHECK-NEXT: vrgather.vi v9, v8, 0
9+
; CHECK-NEXT: lui a0, 5
10+
; CHECK-NEXT: addi a0, a0, 1366
11+
; CHECK-NEXT: vmulh.vx v8, v9, a0
12+
; CHECK-NEXT: vsrl.vi v10, v8, 15
13+
; CHECK-NEXT: vadd.vv v8, v8, v10
14+
; CHECK-NEXT: li a0, 3
15+
; CHECK-NEXT: vnmsub.vx v8, a0, v9
16+
; CHECK-NEXT: ret
17+
entry:
18+
%broadcast.splat = shufflevector <vscale x 4 x i16> %broadcast.splatinsert, <vscale x 4 x i16> zeroinitializer, <vscale x 4 x i32> zeroinitializer
19+
%2 = srem <vscale x 4 x i16> %broadcast.splat, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 3, i64 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
20+
ret <vscale x 4 x i16> %2
21+
}
22+
23+
define <vscale x 4 x i16> @test_mulhu_promote(<vscale x 4 x i16> %broadcast.splatinsert, <vscale x 4 x i1> %0, <vscale x 4 x i1> %1) {
24+
; CHECK-LABEL: test_mulhu_promote:
25+
; CHECK: # %bb.0: # %entry
26+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
27+
; CHECK-NEXT: vrgather.vi v9, v8, 0
28+
; CHECK-NEXT: lui a0, 1048571
29+
; CHECK-NEXT: addi a0, a0, -1365
30+
; CHECK-NEXT: vmulhu.vx v8, v9, a0
31+
; CHECK-NEXT: vsrl.vi v8, v8, 1
32+
; CHECK-NEXT: li a0, 3
33+
; CHECK-NEXT: vnmsub.vx v8, a0, v9
34+
; CHECK-NEXT: ret
35+
entry:
36+
%broadcast.splat = shufflevector <vscale x 4 x i16> %broadcast.splatinsert, <vscale x 4 x i16> zeroinitializer, <vscale x 4 x i32> zeroinitializer
37+
%2 = urem <vscale x 4 x i16> %broadcast.splat, shufflevector (<vscale x 4 x i16> insertelement (<vscale x 4 x i16> poison, i16 3, i64 0), <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer)
38+
ret <vscale x 4 x i16> %2
39+
}
40+
41+
define <vscale x 4 x i64> @test_mulhs_expand(<vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i1> %0, <vscale x 4 x i1> %1) {
42+
; CHECK-LABEL: test_mulhs_expand:
43+
; CHECK: # %bb.0: # %entry
44+
; CHECK-NEXT: addi sp, sp, -16
45+
; CHECK-NEXT: .cfi_def_cfa_offset 16
46+
; CHECK-NEXT: lui a0, 349525
47+
; CHECK-NEXT: addi a1, a0, 1365
48+
; CHECK-NEXT: sw a1, 12(sp)
49+
; CHECK-NEXT: addi a0, a0, 1366
50+
; CHECK-NEXT: sw a0, 8(sp)
51+
; CHECK-NEXT: addi a0, sp, 8
52+
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
53+
; CHECK-NEXT: vlse64.v v12, (a0), zero
54+
; CHECK-NEXT: vrgather.vi v16, v8, 0
55+
; CHECK-NEXT: vmulh.vv v8, v16, v12
56+
; CHECK-NEXT: li a0, 63
57+
; CHECK-NEXT: vsrl.vx v12, v8, a0
58+
; CHECK-NEXT: vadd.vv v8, v8, v12
59+
; CHECK-NEXT: li a0, 3
60+
; CHECK-NEXT: vnmsub.vx v8, a0, v16
61+
; CHECK-NEXT: addi sp, sp, 16
62+
; CHECK-NEXT: ret
63+
entry:
64+
%broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer
65+
%2 = srem <vscale x 4 x i64> %broadcast.splat, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
66+
ret <vscale x 4 x i64> %2
67+
}
68+
69+
define <vscale x 4 x i64> @test_mulhu_expand(<vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i1> %0, <vscale x 4 x i1> %1) {
70+
; CHECK-LABEL: test_mulhu_expand:
71+
; CHECK: # %bb.0: # %entry
72+
; CHECK-NEXT: addi sp, sp, -16
73+
; CHECK-NEXT: .cfi_def_cfa_offset 16
74+
; CHECK-NEXT: lui a0, 699051
75+
; CHECK-NEXT: addi a1, a0, -1366
76+
; CHECK-NEXT: sw a1, 12(sp)
77+
; CHECK-NEXT: addi a0, a0, -1365
78+
; CHECK-NEXT: sw a0, 8(sp)
79+
; CHECK-NEXT: addi a0, sp, 8
80+
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
81+
; CHECK-NEXT: vlse64.v v12, (a0), zero
82+
; CHECK-NEXT: vrgather.vi v16, v8, 0
83+
; CHECK-NEXT: vmulhu.vv v8, v16, v12
84+
; CHECK-NEXT: vsrl.vi v8, v8, 1
85+
; CHECK-NEXT: li a0, 3
86+
; CHECK-NEXT: vnmsub.vx v8, a0, v16
87+
; CHECK-NEXT: addi sp, sp, 16
88+
; CHECK-NEXT: ret
89+
entry:
90+
%broadcast.splat = shufflevector <vscale x 4 x i64> %broadcast.splatinsert, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer
91+
%2 = urem <vscale x 4 x i64> %broadcast.splat, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
92+
ret <vscale x 4 x i64> %2
93+
}

0 commit comments

Comments
 (0)