You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts
This change makes use of the llvm.vector.extract intrinsic to avoid
going through memory when performing bitcasts between vector-length
agnostic types and vector-length specific types.
Depends on D91362
Reviewed By: c-rhodes
Differential Revision: https://reviews.llvm.org/D92761
// CHECK128-LABEL: define <16 x i8> @f2(<16 x i8> %x)
53
53
// CHECK128-NEXT: entry:
54
-
// CHECK128-NEXT: %x.addr = alloca <16 x i8>, align 16
55
-
// CHECK128-NEXT: %saved-call-rvalue = alloca <vscale x 16 x i8>, align 16
56
-
// CHECK128-NEXT: store <16 x i8> %x, <16 x i8>* %x.addr, align 16
57
-
// CHECK128-NEXT: %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
58
-
// CHECK128-NEXT: %1 = bitcast <16 x i8>* %x.addr to <vscale x 16 x i8>*
59
-
// CHECK128-NEXT: %2 = load <vscale x 16 x i8>, <vscale x 16 x i8>* %1, align 16
60
-
// CHECK128-NEXT: %3 = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %2, i32 1)
61
-
// CHECK128-NEXT: store <vscale x 16 x i8> %3, <vscale x 16 x i8>* %saved-call-rvalue, align 16
62
-
// CHECK128-NEXT: %castFixedSve = bitcast <vscale x 16 x i8>* %saved-call-rvalue to <16 x i8>*
63
-
// CHECK128-NEXT: %4 = load <16 x i8>, <16 x i8>* %castFixedSve, align 16
64
-
// CHECK128-NEXT: ret <16 x i8> %4
54
+
// CHECK128-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
55
+
// CHECK128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[X:%.*]], i64 0)
56
+
// CHECK128-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
57
+
// CHECK128-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0)
58
+
// CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]]
65
59
66
60
// CHECK-LABEL: define void @f2(
67
61
// CHECK-SAME: <[[#div(VBITS,8)]] x i8>* noalias nocapture sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, <[[#div(VBITS,8)]] x i8>* nocapture readonly %0)
68
-
// CHECK-NEXT: entry:
69
-
// CHECK-NEXT: %x.addr = alloca <[[#div(VBITS,8)]] x i8>, align 16
70
-
// CHECK-NEXT: %saved-call-rvalue = alloca <vscale x 16 x i8>, align 16
71
-
// CHECK-NEXT: %x = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* %0, align 16
72
-
// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> %x, <[[#div(VBITS,8)]] x i8>* %x.addr, align 16
73
-
// CHECK-NEXT: %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
74
-
// CHECK-NEXT: %2 = bitcast <[[#div(VBITS,8)]] x i8>* %x.addr to <vscale x 16 x i8>*
75
-
// CHECK-NEXT: %3 = load <vscale x 16 x i8>, <vscale x 16 x i8>* %2, align 16
76
-
// CHECK-NEXT: %4 = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %3, i32 1)
77
-
// CHECK-NEXT: store <vscale x 16 x i8> %4, <vscale x 16 x i8>* %saved-call-rvalue, align 16
78
-
// CHECK-NEXT: %castFixedSve = bitcast <vscale x 16 x i8>* %saved-call-rvalue to <[[#div(VBITS,8)]] x i8>*
79
-
// CHECK-NEXT: %5 = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* %castFixedSve, align 16
80
-
// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> %5, <[[#div(VBITS,8)]] x i8>* %agg.result, align 16
81
-
// CHECK-NEXT: ret void
62
+
// CHECK-NEXT: entry:
63
+
// CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
64
+
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
65
+
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
66
+
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1)
67
+
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> @llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 0)
68
+
// CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], <[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
0 commit comments