Skip to content

Commit 487a823

Browse files
[LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors.
For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations.
1 parent cd043e4 commit 487a823

File tree

11 files changed

+563
-71
lines changed

11 files changed

+563
-71
lines changed

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,15 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_
2626
//
2727
// CHECK-128-LABEL: @call_bool32_ff(
2828
// CHECK-128-NEXT: entry:
29+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
30+
// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
2931
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 2 x i1>, align 1
3032
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
31-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2_COERCE:%.*]], i64 4)
32-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]]
33-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]]
33+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
34+
// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
35+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 2 x i1> [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 4)
36+
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA9:![0-9]+]]
37+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]]
3438
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
3539
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
3640
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
@@ -52,11 +56,15 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) {
5256
//
5357
// CHECK-128-LABEL: @call_bool64_ff(
5458
// CHECK-128-NEXT: entry:
59+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
60+
// CHECK-128-NEXT: [[SAVED_VALUE3:%.*]] = alloca <1 x i8>, align 1
5561
// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca <vscale x 1 x i1>, align 1
5662
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
57-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2_COERCE:%.*]], i64 2)
63+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
64+
// CHECK-128-NEXT: [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE3]], align 1, !tbaa [[TBAA6]]
65+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 1 x i1> [[SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_SAVED_VALUE3_0_]], i64 2)
5866
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]]
59-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]]
67+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6]]
6068
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
6169
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
6270
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
@@ -82,11 +90,13 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) {
8290
//
8391
// CHECK-128-LABEL: @call_bool32_fs(
8492
// CHECK-128-NEXT: entry:
93+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
8594
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 2 x i1>, align 1
8695
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
87-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1_COERCE:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
88-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
89-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
96+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
97+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
98+
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA9]]
99+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
90100
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
91101
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
92102
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
@@ -108,11 +118,13 @@ fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) {
108118
//
109119
// CHECK-128-LABEL: @call_bool64_fs(
110120
// CHECK-128-NEXT: entry:
121+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
111122
// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 1 x i1>, align 1
112123
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
113-
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1_COERCE:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
124+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
125+
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
114126
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]]
115-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]]
127+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]]
116128
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
117129
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
118130
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]
@@ -141,8 +153,8 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) {
141153
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
142154
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
143155
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1.i64(<vscale x 2 x i1> [[OP1:%.*]], <vscale x 2 x i1> [[OP2:%.*]], i64 4)
144-
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
145-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
156+
// CHECK-128-NEXT: store <vscale x 2 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9]]
157+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
146158
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
147159
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
148160
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TMP2]]
@@ -168,7 +180,7 @@ fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) {
168180
// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 1 x i1>, align 1
169181
// CHECK-128-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1.i64(<vscale x 1 x i1> [[OP1:%.*]], <vscale x 1 x i1> [[OP2:%.*]], i64 2)
170182
// CHECK-128-NEXT: store <vscale x 1 x i1> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]]
171-
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
183+
// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]]
172184
// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1
173185
// CHECK-128-NEXT: [[TMP2:%.*]] = load <vscale x 1 x i1>, ptr [[RETVAL_COERCE]], align 1
174186
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TMP2]]

clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) {
8080
//
8181
// CHECK-128-LABEL: @to_vbool32_t(
8282
// CHECK-128-NEXT: entry:
83-
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[TYPE_COERCE:%.*]]
83+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
84+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
85+
// CHECK-128-NEXT: ret <vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]
8486
//
8587
vbool32_t to_vbool32_t(fixed_bool32_t type) {
8688
return type;
@@ -116,7 +118,9 @@ fixed_bool64_t from_vbool64_t(vbool64_t type) {
116118
//
117119
// CHECK-128-LABEL: @to_vbool64_t(
118120
// CHECK-128-NEXT: entry:
119-
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[TYPE_COERCE:%.*]]
121+
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
122+
// CHECK-128-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 1 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
123+
// CHECK-128-NEXT: ret <vscale x 1 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]
120124
//
121125
vbool64_t to_vbool64_t(fixed_bool64_t type) {
122126
return type;

clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) {
9999
// CHECK-NEXT: entry:
100100
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 2 x i1>, align 1
101101
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x i1>, align 1
102-
// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]]
103-
// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]]
102+
// CHECK-NEXT: store <vscale x 2 x i1> [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]]
103+
// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10:![0-9]+]]
104104
// CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1
105105
// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 2 x i1>, ptr [[RETVAL_COERCE]], align 1
106106
// CHECK-NEXT: ret <vscale x 2 x i1> [[TMP1]]
@@ -111,15 +111,17 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) {
111111

112112
// CHECK-LABEL: @to_vbool32_t(
113113
// CHECK-NEXT: entry:
114-
// CHECK-NEXT: ret <vscale x 2 x i1> [[TYPE_COERCE:%.*]]
114+
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1
115+
// CHECK-NEXT: [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_:%.*]] = load <vscale x 2 x i1>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]]
116+
// CHECK-NEXT: ret <vscale x 2 x i1> [[SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_SAVED_VALUE_0_]]
115117
//
116118
vbool32_t to_vbool32_t(fixed_bool32_t type) {
117119
return type;
118120
}
119121

120122
// CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t(
121123
// CHECK-NEXT: entry:
122-
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]]
124+
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]]
123125
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
124126
// CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
125127
//
@@ -130,7 +132,7 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
130132
// CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t(
131133
// CHECK-NEXT: entry:
132134
// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE:%.*]], i64 0)
133-
// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]]
135+
// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]]
134136
// CHECK-NEXT: ret void
135137
//
136138
gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
@@ -139,7 +141,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
139141

140142
// CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t(
141143
// CHECK-NEXT: entry:
142-
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]]
144+
// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA10]]
143145
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> poison, <8 x i32> [[TYPE]], i64 0)
144146
// CHECK-NEXT: ret <vscale x 2 x i32> [[CAST_SCALABLE]]
145147
//
@@ -150,7 +152,7 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
150152
// CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t(
151153
// CHECK-NEXT: entry:
152154
// CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
153-
// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]]
155+
// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA10]]
154156
// CHECK-NEXT: ret void
155157
//
156158
gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) {

clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,7 @@ fixed_bool_t from_svbool_t(svbool_t type) {
6262

6363
// CHECK-LABEL: @lax_cast(
6464
// CHECK-NEXT: entry:
65-
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i32>, align 64
66-
// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
67-
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6:![0-9]+]]
68-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6]]
65+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[TYPE_COERCE:%.*]] to <vscale x 2 x i64>
6966
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
7067
//
7168
svint64_t lax_cast(fixed_int32_t type) {
@@ -74,18 +71,18 @@ svint64_t lax_cast(fixed_int32_t type) {
7471

7572
// CHECK-LABEL: @to_svint32_t__from_gnu_int32_t(
7673
// CHECK-NEXT: entry:
77-
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
78-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
79-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
74+
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]]
75+
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
76+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
8077
//
8178
svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
8279
return type;
8380
}
8481

8582
// CHECK-LABEL: @from_svint32_t__to_gnu_int32_t(
8683
// CHECK-NEXT: entry:
87-
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
88-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
84+
// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
85+
// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
8986
// CHECK-NEXT: ret void
9087
//
9188
gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
@@ -94,9 +91,9 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
9491

9592
// CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
9693
// CHECK-NEXT: entry:
97-
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
98-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
99-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
94+
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
95+
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
96+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
10097
//
10198
fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
10299
return type;
@@ -105,7 +102,7 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
105102
// CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t(
106103
// CHECK-NEXT: entry:
107104
// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
108-
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
105+
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
109106
// CHECK-NEXT: ret void
110107
//
111108
gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) {

llvm/include/llvm/IR/Attributes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ class Attribute {
276276
/// when unknown.
277277
std::optional<unsigned> getVScaleRangeMax() const;
278278

279+
/// Return the value for vscale based on the vscale_range attribute or 0 when
280+
/// unknown.
281+
unsigned getVScaleValue() const;
282+
279283
// Returns the unwind table kind.
280284
UWTableKind getUWTableKind() const;
281285

llvm/include/llvm/IR/DerivedTypes.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,22 @@ class VectorType : public Type {
554554
return VectorType::get(VTy->getElementType(), EltCnt * 2);
555555
}
556556

557+
/// This static method returns a VectorType with the same size-in-bits as
558+
/// SizeTy but with an element type that matches the scalar type of EltTy.
559+
static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) {
560+
if (SizeTy->getScalarType() == EltTy->getScalarType())
561+
return SizeTy;
562+
563+
unsigned EltSize = EltTy->getScalarSizeInBits();
564+
if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize))
565+
return nullptr;
566+
567+
ElementCount EC = SizeTy->getElementCount()
568+
.multiplyCoefficientBy(SizeTy->getScalarSizeInBits())
569+
.divideCoefficientBy(EltSize);
570+
return VectorType::get(EltTy->getScalarType(), EC);
571+
}
572+
557573
/// Return true if the specified type is valid as a element type.
558574
static bool isValidElementType(Type *ElemTy);
559575

llvm/lib/IR/AttributeImpl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ class AttributeSetNode final
343343
const;
344344
unsigned getVScaleRangeMin() const;
345345
std::optional<unsigned> getVScaleRangeMax() const;
346+
unsigned getVScaleValue() const;
346347
UWTableKind getUWTableKind() const;
347348
AllocFnKind getAllocKind() const;
348349
MemoryEffects getMemoryEffects() const;

llvm/lib/IR/Attributes.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,14 @@ std::optional<unsigned> Attribute::getVScaleRangeMax() const {
473473
return unpackVScaleRangeArgs(pImpl->getValueAsInt()).second;
474474
}
475475

476+
unsigned Attribute::getVScaleValue() const {
477+
std::optional<unsigned> VScale = getVScaleRangeMax();
478+
if (VScale && *VScale == getVScaleRangeMin())
479+
return *VScale;
480+
481+
return 0;
482+
}
483+
476484
UWTableKind Attribute::getUWTableKind() const {
477485
assert(hasAttribute(Attribute::UWTable) &&
478486
"Trying to get unwind table kind from non-uwtable attribute");

0 commit comments

Comments
 (0)