|
6 | 6 |
|
7 | 7 | target triple = "x86_64-unknown-linux-gnu"
|
8 | 8 |
|
9 |
| -define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { |
| 9 | +define internal fastcc void @no_promote_avx2(ptr %arg, ptr readonly %arg1) #0 { |
10 | 10 | ; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
|
11 | 11 | ; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
|
12 |
| -; CHECK-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { |
| 12 | +; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { |
13 | 13 | ; CHECK-NEXT: bb:
|
14 |
| -; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 |
15 |
| -; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 |
| 14 | +; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, ptr [[ARG1]], align 32 |
| 15 | +; CHECK-NEXT: store <4 x i64> [[TMP]], ptr [[ARG]], align 32 |
16 | 16 | ; CHECK-NEXT: ret void
|
17 | 17 | ;
|
18 | 18 | bb:
|
19 |
| - %tmp = load <4 x i64>, <4 x i64>* %arg1 |
20 |
| - store <4 x i64> %tmp, <4 x i64>* %arg |
| 19 | + %tmp = load <4 x i64>, ptr %arg1 |
| 20 | + store <4 x i64> %tmp, ptr %arg |
21 | 21 | ret void
|
22 | 22 | }
|
23 | 23 |
|
24 |
| -define void @no_promote(<4 x i64>* %arg) #1 { |
| 24 | +define void @no_promote(ptr %arg) #1 { |
25 | 25 | ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
|
26 | 26 | ; TUNIT-LABEL: define {{[^@]+}}@no_promote
|
27 |
| -; TUNIT-SAME: (<4 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { |
| 27 | +; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { |
28 | 28 | ; TUNIT-NEXT: bb:
|
29 | 29 | ; TUNIT-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
|
30 | 30 | ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
|
31 |
| -; TUNIT-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* |
32 |
| -; TUNIT-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]] |
33 |
| -; TUNIT-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]] |
34 |
| -; TUNIT-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 |
35 |
| -; TUNIT-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 |
| 31 | +; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]] |
| 32 | +; TUNIT-NEXT: call fastcc void @no_promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]] |
| 33 | +; TUNIT-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 |
| 34 | +; TUNIT-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2 |
36 | 35 | ; TUNIT-NEXT: ret void
|
37 | 36 | ;
|
38 | 37 | ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) uwtable
|
39 | 38 | ; CGSCC-LABEL: define {{[^@]+}}@no_promote
|
40 |
| -; CGSCC-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { |
| 39 | +; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { |
41 | 40 | ; CGSCC-NEXT: bb:
|
42 | 41 | ; CGSCC-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
|
43 | 42 | ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
|
44 |
| -; CGSCC-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* |
45 |
| -; CGSCC-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]] |
46 |
| -; CGSCC-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]] |
47 |
| -; CGSCC-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 |
48 |
| -; CGSCC-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 |
| 43 | +; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3:[0-9]+]] |
| 44 | +; CGSCC-NEXT: call fastcc void @no_promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) #[[ATTR4:[0-9]+]] |
| 45 | +; CGSCC-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 |
| 46 | +; CGSCC-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2 |
49 | 47 | ; CGSCC-NEXT: ret void
|
50 | 48 | ;
|
51 | 49 | bb:
|
52 | 50 | %tmp = alloca <4 x i64>, align 32
|
53 | 51 | %tmp2 = alloca <4 x i64>, align 32
|
54 |
| - %tmp3 = bitcast <4 x i64>* %tmp to i8* |
55 |
| - call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) |
56 |
| - call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) |
57 |
| - %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 |
58 |
| - store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 |
| 52 | + call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false) |
| 53 | + call fastcc void @no_promote_avx2(ptr %tmp2, ptr %tmp) |
| 54 | + %tmp4 = load <4 x i64>, ptr %tmp2, align 32 |
| 55 | + store <4 x i64> %tmp4, ptr %arg, align 2 |
59 | 56 | ret void
|
60 | 57 | }
|
61 | 58 |
|
62 |
| -define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { |
| 59 | +define internal fastcc void @promote_avx2(ptr %arg, ptr readonly %arg1) #0 { |
63 | 60 | ; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
|
64 | 61 | ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
|
65 |
| -; CHECK-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { |
| 62 | +; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { |
66 | 63 | ; CHECK-NEXT: bb:
|
67 | 64 | ; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32
|
68 |
| -; CHECK-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32 |
69 |
| -; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32 |
70 |
| -; CHECK-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 |
| 65 | +; CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 32 |
| 66 | +; CHECK-NEXT: [[TMP:%.*]] = load <4 x i64>, ptr [[ARG1_PRIV]], align 32 |
| 67 | +; CHECK-NEXT: store <4 x i64> [[TMP]], ptr [[ARG]], align 32 |
71 | 68 | ; CHECK-NEXT: ret void
|
72 | 69 | ;
|
73 | 70 | bb:
|
74 |
| - %tmp = load <4 x i64>, <4 x i64>* %arg1 |
75 |
| - store <4 x i64> %tmp, <4 x i64>* %arg |
| 71 | + %tmp = load <4 x i64>, ptr %arg1 |
| 72 | + store <4 x i64> %tmp, ptr %arg |
76 | 73 | ret void
|
77 | 74 | }
|
78 | 75 |
|
79 |
| -define void @promote(<4 x i64>* %arg) #0 { |
| 76 | +define void @promote(ptr %arg) #0 { |
80 | 77 | ; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
|
81 | 78 | ; TUNIT-LABEL: define {{[^@]+}}@promote
|
82 |
| -; TUNIT-SAME: (<4 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { |
| 79 | +; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { |
83 | 80 | ; TUNIT-NEXT: bb:
|
84 | 81 | ; TUNIT-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
|
85 | 82 | ; TUNIT-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
|
86 |
| -; TUNIT-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* |
87 |
| -; TUNIT-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]] |
88 |
| -; TUNIT-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32 |
89 |
| -; TUNIT-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]] |
90 |
| -; TUNIT-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 |
91 |
| -; TUNIT-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 |
| 83 | +; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]] |
| 84 | +; TUNIT-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[TMP]], align 32 |
| 85 | +; TUNIT-NEXT: call fastcc void @promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]] |
| 86 | +; TUNIT-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 |
| 87 | +; TUNIT-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2 |
92 | 88 | ; TUNIT-NEXT: ret void
|
93 | 89 | ;
|
94 | 90 | ; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
|
95 | 91 | ; CGSCC-LABEL: define {{[^@]+}}@promote
|
96 |
| -; CGSCC-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR0]] { |
| 92 | +; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR0]] { |
97 | 93 | ; CGSCC-NEXT: bb:
|
98 | 94 | ; CGSCC-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32
|
99 | 95 | ; CGSCC-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
|
100 |
| -; CGSCC-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* |
101 |
| -; CGSCC-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]] |
102 |
| -; CGSCC-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32 |
103 |
| -; CGSCC-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]] |
104 |
| -; CGSCC-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 |
105 |
| -; CGSCC-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 |
| 96 | +; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]] |
| 97 | +; CGSCC-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[TMP]], align 32 |
| 98 | +; CGSCC-NEXT: call fastcc void @promote_avx2(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]] |
| 99 | +; CGSCC-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 |
| 100 | +; CGSCC-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2 |
106 | 101 | ; CGSCC-NEXT: ret void
|
107 | 102 | ;
|
108 | 103 | bb:
|
109 | 104 | %tmp = alloca <4 x i64>, align 32
|
110 | 105 | %tmp2 = alloca <4 x i64>, align 32
|
111 |
| - %tmp3 = bitcast <4 x i64>* %tmp to i8* |
112 |
| - call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) |
113 |
| - call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp) |
114 |
| - %tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32 |
115 |
| - store <4 x i64> %tmp4, <4 x i64>* %arg, align 2 |
| 106 | + call void @llvm.memset.p0.i64(ptr align 32 %tmp, i8 0, i64 32, i1 false) |
| 107 | + call fastcc void @promote_avx2(ptr %tmp2, ptr %tmp) |
| 108 | + %tmp4 = load <4 x i64>, ptr %tmp2, align 32 |
| 109 | + store <4 x i64> %tmp4, ptr %arg, align 2 |
116 | 110 | ret void
|
117 | 111 | }
|
118 | 112 |
|
119 | 113 | ; Function Attrs: argmemonly nounwind
|
120 |
| -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2 |
| 114 | +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #2 |
121 | 115 |
|
122 | 116 | attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
|
123 | 117 | attributes #1 = { nounwind uwtable }
|
|
0 commit comments