Skip to content

Commit 016eca8

Browse files
committed
[RISCV] Guard LowerINSERT_VECTOR_ELT against fixed vectors.
The type legalizer can call this code based on the scalar type so we need to verify the vector type is a scalable vector. I think due to how type legalization visits nodes, the vector type will have already been legalized so we don't have an issue with using MVT here like we did for EXTRACT_VECTOR_ELT. I've added a test just in case.
1 parent 58ecfcc commit 016eca8

File tree

2 files changed

+130
-1
lines changed

2 files changed

+130
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1816,7 +1816,7 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
18161816
// first slid down into position, the value is inserted into the first
18171817
// position, and the vector is slid back up. We do this to simplify patterns.
18181818
// (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
1819-
if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
1819+
if (Subtarget.is64Bit() || Val.getValueType() != MVT::i64) {
18201820
if (isNullConstant(Idx))
18211821
return Op;
18221822
SDValue Mask, VL;
@@ -1831,6 +1831,9 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
18311831
Mask, VL);
18321832
}
18331833

1834+
if (!VecVT.isScalableVector())
1835+
return SDValue();
1836+
18341837
// Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
18351838
// is illegal (currently only vXi64 RV32).
18361839
// Since there is no easy way of getting a single element into a vector when
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
3+
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
4+
5+
; FIXME: This codegen needs to be improved. These tests previously asserted
6+
; type legalizing the i64 type on RV32.
7+
8+
define void @insertelt_v4i64(<4 x i64>* %x, i64 %y) {
9+
; RV32-LABEL: insertelt_v4i64:
10+
; RV32: # %bb.0:
11+
; RV32-NEXT: addi sp, sp, -224
12+
; RV32-NEXT: .cfi_def_cfa_offset 224
13+
; RV32-NEXT: sw ra, 220(sp) # 4-byte Folded Spill
14+
; RV32-NEXT: sw s0, 216(sp) # 4-byte Folded Spill
15+
; RV32-NEXT: .cfi_offset ra, -4
16+
; RV32-NEXT: .cfi_offset s0, -8
17+
; RV32-NEXT: addi s0, sp, 224
18+
; RV32-NEXT: .cfi_def_cfa s0, 0
19+
; RV32-NEXT: andi sp, sp, -32
20+
; RV32-NEXT: addi a3, zero, 8
21+
; RV32-NEXT: vsetvli a4, a3, e32,m2,ta,mu
22+
; RV32-NEXT: vle32.v v26, (a0)
23+
; RV32-NEXT: vse32.v v26, (sp)
24+
; RV32-NEXT: addi a6, zero, 2
25+
; RV32-NEXT: addi a5, sp, 16
26+
; RV32-NEXT: vsetvli a4, a6, e32,m1,ta,mu
27+
; RV32-NEXT: vle32.v v25, (a5)
28+
; RV32-NEXT: addi a4, sp, 112
29+
; RV32-NEXT: vse32.v v25, (a4)
30+
; RV32-NEXT: addi a4, sp, 8
31+
; RV32-NEXT: vle32.v v25, (a4)
32+
; RV32-NEXT: addi a4, sp, 104
33+
; RV32-NEXT: vse32.v v25, (a4)
34+
; RV32-NEXT: sw a2, 128(sp)
35+
; RV32-NEXT: vsetvli a2, a3, e32,m2,ta,mu
36+
; RV32-NEXT: addi a2, sp, 128
37+
; RV32-NEXT: vle32.v v26, (a2)
38+
; RV32-NEXT: addi a2, sp, 64
39+
; RV32-NEXT: vse32.v v26, (a2)
40+
; RV32-NEXT: sw a1, 160(sp)
41+
; RV32-NEXT: addi a1, sp, 160
42+
; RV32-NEXT: vle32.v v26, (a1)
43+
; RV32-NEXT: addi a1, sp, 32
44+
; RV32-NEXT: vse32.v v26, (a1)
45+
; RV32-NEXT: vsetvli a1, a6, e32,m1,ta,mu
46+
; RV32-NEXT: vle32.v v25, (sp)
47+
; RV32-NEXT: addi a1, sp, 96
48+
; RV32-NEXT: vse32.v v25, (a1)
49+
; RV32-NEXT: lw a1, 64(sp)
50+
; RV32-NEXT: sw a1, 124(sp)
51+
; RV32-NEXT: lw a1, 32(sp)
52+
; RV32-NEXT: sw a1, 120(sp)
53+
; RV32-NEXT: vsetvli a1, a3, e32,m2,ta,mu
54+
; RV32-NEXT: addi a1, sp, 96
55+
; RV32-NEXT: vle32.v v26, (a1)
56+
; RV32-NEXT: vse32.v v26, (a0)
57+
; RV32-NEXT: addi sp, s0, -224
58+
; RV32-NEXT: lw s0, 216(sp) # 4-byte Folded Reload
59+
; RV32-NEXT: lw ra, 220(sp) # 4-byte Folded Reload
60+
; RV32-NEXT: addi sp, sp, 224
61+
; RV32-NEXT: ret
62+
;
63+
; RV64-LABEL: insertelt_v4i64:
64+
; RV64: # %bb.0:
65+
; RV64-NEXT: addi sp, sp, -160
66+
; RV64-NEXT: .cfi_def_cfa_offset 160
67+
; RV64-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
68+
; RV64-NEXT: sd s0, 144(sp) # 8-byte Folded Spill
69+
; RV64-NEXT: .cfi_offset ra, -8
70+
; RV64-NEXT: .cfi_offset s0, -16
71+
; RV64-NEXT: addi s0, sp, 160
72+
; RV64-NEXT: .cfi_def_cfa s0, 0
73+
; RV64-NEXT: andi sp, sp, -32
74+
; RV64-NEXT: addi a2, zero, 4
75+
; RV64-NEXT: vsetvli a3, a2, e64,m2,ta,mu
76+
; RV64-NEXT: vle64.v v26, (a0)
77+
; RV64-NEXT: vse64.v v26, (sp)
78+
; RV64-NEXT: sd a1, 96(sp)
79+
; RV64-NEXT: addi a1, sp, 96
80+
; RV64-NEXT: vle64.v v26, (a1)
81+
; RV64-NEXT: addi a1, sp, 32
82+
; RV64-NEXT: vse64.v v26, (a1)
83+
; RV64-NEXT: addi a1, zero, 2
84+
; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
85+
; RV64-NEXT: vle64.v v25, (sp)
86+
; RV64-NEXT: addi a1, sp, 64
87+
; RV64-NEXT: vse64.v v25, (a1)
88+
; RV64-NEXT: ld a1, 16(sp)
89+
; RV64-NEXT: sd a1, 80(sp)
90+
; RV64-NEXT: ld a1, 32(sp)
91+
; RV64-NEXT: sd a1, 88(sp)
92+
; RV64-NEXT: vsetvli a1, a2, e64,m2,ta,mu
93+
; RV64-NEXT: addi a1, sp, 64
94+
; RV64-NEXT: vle64.v v26, (a1)
95+
; RV64-NEXT: vse64.v v26, (a0)
96+
; RV64-NEXT: addi sp, s0, -160
97+
; RV64-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
98+
; RV64-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
99+
; RV64-NEXT: addi sp, sp, 160
100+
; RV64-NEXT: ret
101+
%a = load <4 x i64>, <4 x i64>* %x
102+
%b = insertelement <4 x i64> %a, i64 %y, i32 3
103+
store <4 x i64> %b, <4 x i64>* %x
104+
ret void
105+
}
106+
107+
; This uses a non-power of 2 type so that it isn't an MVT.
108+
; The align keeps the type legalizer from using a 256 bit load so we must split
109+
; it. This some operations that weren't support for scalable vectors when
110+
; this test was written.
111+
define void @insertelt_v3i64(<3 x i64>* %x, i64 %y) {
112+
; RV32-LABEL: insertelt_v3i64:
113+
; RV32: # %bb.0:
114+
; RV32-NEXT: sw a1, 16(a0)
115+
; RV32-NEXT: sw a2, 20(a0)
116+
; RV32-NEXT: ret
117+
;
118+
; RV64-LABEL: insertelt_v3i64:
119+
; RV64: # %bb.0:
120+
; RV64-NEXT: sd a1, 16(a0)
121+
; RV64-NEXT: ret
122+
%a = load <3 x i64>, <3 x i64>* %x, align 8
123+
%b = insertelement <3 x i64> %a, i64 %y, i32 2
124+
store <3 x i64> %b, <3 x i64>* %x
125+
ret void
126+
}

0 commit comments

Comments
 (0)