Skip to content

Commit 605b478

Browse files
[LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors.
For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations.
1 parent 32a2805 commit 605b478

File tree

9 files changed

+240
-127
lines changed

9 files changed

+240
-127
lines changed

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_
2626
//
2727
// CHECK-128-LABEL: @call_bool32_ff(
2828
// CHECK-128-NEXT: entry:
29+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
30+
// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
2931
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
3032
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
31-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 4)
32-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
33-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
33+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
34+
// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
35+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 2 x i1> [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4)
36+
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]]
37+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]]
3438
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
3539
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
3640
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
@@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
5256
//
5357
// CHECK-128-LABEL: @call_bool64_ff(
5458
// CHECK-128-NEXT: entry:
59+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
60+
// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
5561
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
5662
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
57-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 2)
63+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
64+
// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
65+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 1 x i1> [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2)
5866
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
59-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
67+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]]
6068
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
6169
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
6270
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
@@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
8290
//
8391
// CHECK-128-LABEL: @call_bool32_fs(
8492
// CHECK-128-NEXT: entry:
93+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
8594
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
8695
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
87-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
88-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
89-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
96+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
97+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
98+
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA9]]
99+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
90100
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
91101
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
92102
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
@@ -108,11 +118,13 @@ fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) {
108118
//
109119
// CHECK-128-LABEL: @call_bool64_fs(
110120
// CHECK-128-NEXT: entry:
121+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
111122
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
112123
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
113-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
124+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
125+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
114126
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
115-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
127+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
116128
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
117129
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
118130
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
@@ -141,8 +153,8 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
141153
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
142154
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
143155
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
144-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
145-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
156+
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9]]
157+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
146158
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
147159
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
148160
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
@@ -168,7 +180,7 @@ fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) {
168180
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
169181
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
170182
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
171-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
183+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
172184
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
173185
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
174186
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) {
8080
//
8181
// CHECK-128-LABEL: @to_vbool32_t(
8282
// CHECK-128-NEXT: entry:
83-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TYPE_COERCE:%.*]]
83+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
84+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
85+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]
8486
//
8587
vbool32_t to_vbool32_t(fixed_bool32_t type) {
8688
return type;
@@ -116,7 +118,9 @@ fixed_bool64_t from_vbool64_t(vbool64_t type) {
116118
//
117119
// CHECK-128-LABEL: @to_vbool64_t(
118120
// CHECK-128-NEXT: entry:
119-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TYPE_COERCE:%.*]]
121+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
122+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
123+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]
120124
//
121125
vbool64_t to_vbool64_t(fixed_bool64_t type) {
122126
return type;

clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) {
9999
// CHECK-NEXT: entry:
100100
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
101101
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
102-
// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]]
103-
// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]]
102+
// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
103+
// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10:![0-9]+]]
104104
// CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1
105105
// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
106106
// CHECK-NEXT: ret <vscale x 2 x i1> [[TMP1]]
@@ -111,15 +111,17 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) {
111111

112112
// CHECK-LABEL: @to_vbool32_t(
113113
// CHECK-NEXT: entry:
114-
// CHECK-NEXT: ret <vscale x 2 x i1> [[TYPE_COERCE:%.*]]
114+
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
115+
// CHECK-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
116+
// CHECK-NEXT: ret <vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]
115117
//
116118
vbool32_t to_vbool32_t(fixed_bool32_t type) {
117119
return type;
118120
}
119121

120122
// CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t(
121123
// CHECK-NEXT: entry:
122-
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]]
124+
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]]
123125
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
124126
// CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
125127
//
@@ -130,7 +132,7 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
130132
// CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t(
131133
// CHECK-NEXT: entry:
132134
// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE:%.*]], i64 0)
133-
// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]]
135+
// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]]
134136
// CHECK-NEXT: ret void
135137
//
136138
gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
@@ -139,7 +141,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
139141

140142
// CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t(
141143
// CHECK-NEXT: entry:
142-
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]]
144+
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]]
143145
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
144146
// CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
145147
//
@@ -150,7 +152,7 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
150152
// CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t(
151153
// CHECK-NEXT: entry:
152154
// CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
153-
// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]]
155+
// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]]
154156
// CHECK-NEXT: ret void
155157
//
156158
gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) {

clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,7 @@ fixed_bool_t from_svbool_t(svbool_t type) {
6262

6363
// CHECK-LABEL: @lax_cast(
6464
// CHECK-NEXT: entry:
65-
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i32>, align 64
66-
// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
67-
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6:![0-9]+]]
68-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6]]
65+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[TYPE_COERCE:%.*]] to <vscale x 2 x i64>
6966
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
7067
//
7168
svint64_t lax_cast(fixed_int32_t type) {
@@ -74,18 +71,18 @@ svint64_t lax_cast(fixed_int32_t type) {
7471

7572
// CHECK-LABEL: @to_svint32_t__from_gnu_int32_t(
7673
// CHECK-NEXT: entry:
77-
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
78-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
79-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
74+
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]]
75+
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
76+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
8077
//
8178
svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
8279
return type;
8380
}
8481

8582
// CHECK-LABEL: @from_svint32_t__to_gnu_int32_t(
8683
// CHECK-NEXT: entry:
87-
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
88-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
84+
// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
85+
// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
8986
// CHECK-NEXT: ret void
9087
//
9188
gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
@@ -94,9 +91,9 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
9491

9592
// CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
9693
// CHECK-NEXT: entry:
97-
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
98-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
99-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
94+
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
95+
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
96+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
10097
//
10198
fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
10299
return type;
@@ -105,7 +102,7 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
105102
// CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t(
106103
// CHECK-NEXT: entry:
107104
// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
108-
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
105+
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
109106
// CHECK-NEXT: ret void
110107
//
111108
gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) {

llvm/include/llvm/IR/DerivedTypes.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,22 @@ class VectorType : public Type {
554554
return VectorType::get(VTy->getElementType(), EltCnt * 2);
555555
}
556556

557+
/// This static method returns a VectorType with the same size-in-bits as
558+
/// SizeTy but with an element type that matches the scalar type of EltTy.
559+
static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {
560+
if (SizeTy->getScalarType() == EltTy->getScalarType())
561+
return SizeTy;
562+
563+
unsigned EltSize = EltTy->getScalarSizeInBits();
564+
if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize))
565+
return nullptr;
566+
567+
ElementCount EC = SizeTy->getElementCount()
568+
.multiplyCoefficientBy(SizeTy->getScalarSizeInBits())
569+
.divideCoefficientBy(EltSize);
570+
return VectorType::get(EltTy->getScalarType(), EC);
571+
}
572+
557573
/// Return true if the specified type is valid as a element type.
558574
static bool isValidElementType(Type *ElemTy);
559575

llvm/include/llvm/IR/Function.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,10 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
10361036
/// Return value: true => null pointer dereference is not undefined.
10371037
bool nullPointerIsDefined() const;
10381038

1039+
/// Return the value for vscale based on the vscale_range attribute or 0 when
1040+
/// unknown.
1041+
unsigned getVScaleValue() const;
1042+
10391043
private:
10401044
void allocHungoffUselist();
10411045
template<int Idx> void setHungoffOperand(Constant *C);

llvm/lib/IR/Function.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,15 @@ bool Function::nullPointerIsDefined() const {
11681168
return hasFnAttribute(Attribute::NullPointerIsValid);
11691169
}
11701170

1171+
unsigned Function::getVScaleValue() const {
1172+
Attribute Attr = getFnAttribute(Attribute::VScaleRange);
1173+
if (!Attr.isValid())
1174+
return 0;
1175+
1176+
unsigned VScale = Attr.getVScaleRangeMax().value_or(0);
1177+
return VScale == Attr.getVScaleRangeMin() ? VScale : 0;
1178+
}
1179+
11711180
bool llvm::NullPointerIsDefined(const Function *F, unsigned AS) {
11721181
if (F && F->nullPointerIsDefined())
11731182
return true;

0 commit comments

Comments
 (0)