Skip to content

Commit ea90466

Browse files
[LLVM][SROA] Teach SROA how to "bitcast" between fixed and scalable vectors. (#130973)
For function whose vscale_range is limited to a single value we can size scalable vectors. This aids SROA by allowing scalable vector load and store operations to be considered for replacement whereby bitcasts through memory can be replaced by vector insert or extract operations.
1 parent c59cc2b commit ea90466

File tree

6 files changed

+721
-55
lines changed

6 files changed

+721
-55
lines changed

clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,7 @@ fixed_bool_t from_svbool_t(svbool_t type) {
6262

6363
// CHECK-LABEL: @lax_cast(
6464
// CHECK-NEXT: entry:
65-
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <16 x i32>, align 64
66-
// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
67-
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6:![0-9]+]]
68-
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 2 x i64>, ptr [[SAVED_VALUE]], align 64, !tbaa [[TBAA6]]
65+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 4 x i32> [[TYPE_COERCE:%.*]] to <vscale x 2 x i64>
6966
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP0]]
7067
//
7168
svint64_t lax_cast(fixed_int32_t type) {
@@ -74,18 +71,18 @@ svint64_t lax_cast(fixed_int32_t type) {
7471

7572
// CHECK-LABEL: @to_svint32_t__from_gnu_int32_t(
7673
// CHECK-NEXT: entry:
77-
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
78-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
79-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
74+
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2:![0-9]+]]
75+
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
76+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
8077
//
8178
svint32_t to_svint32_t__from_gnu_int32_t(gnu_int32_t type) {
8279
return type;
8380
}
8481

8582
// CHECK-LABEL: @from_svint32_t__to_gnu_int32_t(
8683
// CHECK-NEXT: entry:
87-
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
88-
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
84+
// CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE:%.*]], i64 0)
85+
// CHECK-NEXT: store <16 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
8986
// CHECK-NEXT: ret void
9087
//
9188
gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
@@ -94,9 +91,9 @@ gnu_int32_t from_svint32_t__to_gnu_int32_t(svint32_t type) {
9491

9592
// CHECK-LABEL: @to_fixed_int32_t__from_gnu_int32_t(
9693
// CHECK-NEXT: entry:
97-
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA6]]
98-
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
99-
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE]]
94+
// CHECK-NEXT: [[TYPE:%.*]] = load <16 x i32>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA2]]
95+
// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[TYPE]], i64 0)
96+
// CHECK-NEXT: ret <vscale x 4 x i32> [[CAST_SCALABLE]]
10097
//
10198
fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
10299
return type;
@@ -105,7 +102,7 @@ fixed_int32_t to_fixed_int32_t__from_gnu_int32_t(gnu_int32_t type) {
105102
// CHECK-LABEL: @from_fixed_int32_t__to_gnu_int32_t(
106103
// CHECK-NEXT: entry:
107104
// CHECK-NEXT: [[TYPE:%.*]] = tail call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TYPE_COERCE:%.*]], i64 0)
108-
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA6]]
105+
// CHECK-NEXT: store <16 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 16, !tbaa [[TBAA2]]
109106
// CHECK-NEXT: ret void
110107
//
111108
gnu_int32_t from_fixed_int32_t__to_gnu_int32_t(fixed_int32_t type) {

llvm/include/llvm/IR/Function.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,6 +1053,10 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
10531053
/// defined.
10541054
void setAlignment(MaybeAlign Align) { GlobalObject::setAlignment(Align); }
10551055

1056+
/// Return the value for vscale based on the vscale_range attribute or 0 when
1057+
/// unknown.
1058+
unsigned getVScaleValue() const;
1059+
10561060
private:
10571061
void allocHungoffUselist();
10581062
template<int Idx> void setHungoffOperand(Constant *C);

llvm/lib/IR/Function.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,18 @@ bool Function::nullPointerIsDefined() const {
11651165
return hasFnAttribute(Attribute::NullPointerIsValid);
11661166
}
11671167

1168+
unsigned Function::getVScaleValue() const {
1169+
Attribute Attr = getFnAttribute(Attribute::VScaleRange);
1170+
if (!Attr.isValid())
1171+
return 0;
1172+
1173+
unsigned VScale = Attr.getVScaleRangeMin();
1174+
if (VScale && VScale == Attr.getVScaleRangeMax())
1175+
return VScale;
1176+
1177+
return 0;
1178+
}
1179+
11681180
bool llvm::NullPointerIsDefined(const Function *F, unsigned AS) {
11691181
if (F && F->nullPointerIsDefined())
11701182
return true;

0 commit comments

Comments
 (0)