@@ -19,14 +19,12 @@ for.cond: ; preds = %for.body, %entry
19
19
br i1 %cmp , label %for.body , label %for.end
20
20
21
21
for.body: ; preds = %for.cond
22
- %0 = bitcast ptr %a to ptr
23
- %1 = bitcast <4 x i32 > %s.coerce.fca.0 .extract to <16 x i8 >
24
- %2 = bitcast <4 x i32 > %s.coerce.fca.1.extract to <16 x i8 >
22
+ %0 = bitcast < 4 x i32 > %s.coerce.fca.0.extract to < 16 x i8 >
23
+ %1 = bitcast <4 x i32 > %s.coerce.fca.1 .extract to <16 x i8 >
24
+ %2 = bitcast <16 x i8 > %0 to <4 x i32 >
25
25
%3 = bitcast <16 x i8 > %1 to <4 x i32 >
26
- %4 = bitcast <16 x i8 > %2 to <4 x i32 >
27
- call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %4 , ptr %0 )
28
- %5 = bitcast ptr %a to ptr
29
- %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %5 )
26
+ call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %2 , <4 x i32 > %3 , ptr %a )
27
+ %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %a )
30
28
%vld2.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 0
31
29
%vld2.fca.1.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 1
32
30
%call = call <4 x i32 > @vaddq_s32 (<4 x i32 > %vld2.fca.0.extract , <4 x i32 > %vld2.fca.0.extract )
@@ -54,15 +52,13 @@ for.cond: ; preds = %for.body, %entry
54
52
br i1 %cmp , label %for.body , label %for.end
55
53
56
54
for.body: ; preds = %for.cond
57
- %0 = bitcast ptr %a to ptr
58
- %1 = bitcast <4 x i32 > %s.coerce.fca.0 .extract to <16 x i8 >
59
- %2 = bitcast <4 x i32 > %s.coerce.fca.1.extract to <16 x i8 >
55
+ %0 = bitcast < 4 x i32 > %s.coerce.fca.0.extract to < 16 x i8 >
56
+ %1 = bitcast <4 x i32 > %s.coerce.fca.1 .extract to <16 x i8 >
57
+ %2 = bitcast <16 x i8 > %0 to <4 x i32 >
60
58
%3 = bitcast <16 x i8 > %1 to <4 x i32 >
61
- %4 = bitcast <16 x i8 > %2 to <4 x i32 >
62
- call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %3 , ptr %0 )
63
- call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %4 , ptr %0 )
64
- %5 = bitcast ptr %a to ptr
65
- %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %5 )
59
+ call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %2 , <4 x i32 > %2 , ptr %a )
60
+ call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %2 , <4 x i32 > %3 , ptr %a )
61
+ %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %a )
66
62
%vld2.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 0
67
63
%vld2.fca.1.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 1
68
64
%call = call <4 x i32 > @vaddq_s32 (<4 x i32 > %vld2.fca.0.extract , <4 x i32 > %vld2.fca.0.extract )
@@ -90,12 +86,10 @@ for.cond: ; preds = %for.body, %entry
90
86
br i1 %cmp , label %for.body , label %for.end
91
87
92
88
for.body: ; preds = %for.cond
93
- %0 = bitcast ptr %a to ptr
94
- %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %0 )
89
+ %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %a )
95
90
%vld2.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 0
96
91
%vld2.fca.1.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 1
97
- %1 = bitcast ptr %a to ptr
98
- %vld22 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %1 )
92
+ %vld22 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %a )
99
93
%vld22.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld22 , 0
100
94
%vld22.fca.1.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld22 , 1
101
95
%call = call <4 x i32 > @vaddq_s32 (<4 x i32 > %vld2.fca.0.extract , <4 x i32 > %vld22.fca.0.extract )
@@ -124,15 +118,13 @@ for.cond: ; preds = %for.body, %entry
124
118
br i1 %cmp , label %for.body , label %for.end
125
119
126
120
for.body: ; preds = %for.cond
127
- %0 = bitcast ptr %a to ptr
128
- %1 = bitcast <4 x i32 > %s.coerce.fca.0 .extract to <16 x i8 >
129
- %2 = bitcast <4 x i32 > %s.coerce.fca.1.extract to <16 x i8 >
121
+ %0 = bitcast < 4 x i32 > %s.coerce.fca.0.extract to < 16 x i8 >
122
+ %1 = bitcast <4 x i32 > %s.coerce.fca.1 .extract to <16 x i8 >
123
+ %2 = bitcast <16 x i8 > %0 to <4 x i32 >
130
124
%3 = bitcast <16 x i8 > %1 to <4 x i32 >
131
- %4 = bitcast <16 x i8 > %2 to <4 x i32 >
132
- call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %4 , ptr %0 )
125
+ call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %2 , <4 x i32 > %3 , ptr %a )
133
126
store i32 0 , ptr %b , align 4
134
- %5 = bitcast ptr %a to ptr
135
- %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %5 )
127
+ %vld2 = call { <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld2.v4i32.p0 (ptr %a )
136
128
%vld2.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 0
137
129
%vld2.fca.1.extract = extractvalue { <4 x i32 >, <4 x i32 > } %vld2 , 1
138
130
%call = call <4 x i32 > @vaddq_s32 (<4 x i32 > %vld2.fca.0.extract , <4 x i32 > %vld2.fca.0.extract )
@@ -160,14 +152,12 @@ for.cond: ; preds = %for.body, %entry
160
152
br i1 %cmp , label %for.body , label %for.end
161
153
162
154
for.body: ; preds = %for.cond
163
- %0 = bitcast ptr %a to ptr
164
- %1 = bitcast <4 x i32 > %s.coerce.fca.0 .extract to <16 x i8 >
165
- %2 = bitcast <4 x i32 > %s.coerce.fca.1.extract to <16 x i8 >
155
+ %0 = bitcast < 4 x i32 > %s.coerce.fca.0.extract to < 16 x i8 >
156
+ %1 = bitcast <4 x i32 > %s.coerce.fca.1 .extract to <16 x i8 >
157
+ %2 = bitcast <16 x i8 > %0 to <4 x i32 >
166
158
%3 = bitcast <16 x i8 > %1 to <4 x i32 >
167
- %4 = bitcast <16 x i8 > %2 to <4 x i32 >
168
- call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %4 , ptr %0 )
169
- %5 = bitcast ptr %a to ptr
170
- %vld3 = call { <4 x i32 >, <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld3.v4i32.p0 (ptr %5 )
159
+ call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %2 , <4 x i32 > %3 , ptr %a )
160
+ %vld3 = call { <4 x i32 >, <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld3.v4i32.p0 (ptr %a )
171
161
%vld3.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 >, <4 x i32 > } %vld3 , 0
172
162
%vld3.fca.2.extract = extractvalue { <4 x i32 >, <4 x i32 >, <4 x i32 > } %vld3 , 2
173
163
%call = call <4 x i32 > @vaddq_s32 (<4 x i32 > %vld3.fca.0.extract , <4 x i32 > %vld3.fca.2.extract )
@@ -196,15 +186,13 @@ for.cond: ; preds = %for.body, %entry
196
186
br i1 %cmp , label %for.body , label %for.end
197
187
198
188
for.body: ; preds = %for.cond
199
- %0 = bitcast ptr %a to ptr
200
- %1 = bitcast <4 x i32 > %s.coerce.fca.0 .extract to <16 x i8 >
201
- %2 = bitcast <4 x i32 > %s.coerce.fca.1.extract to <16 x i8 >
189
+ %0 = bitcast < 4 x i32 > %s.coerce.fca.0.extract to < 16 x i8 >
190
+ %1 = bitcast <4 x i32 > %s.coerce.fca.1 .extract to <16 x i8 >
191
+ %2 = bitcast <16 x i8 > %0 to <4 x i32 >
202
192
%3 = bitcast <16 x i8 > %1 to <4 x i32 >
203
- %4 = bitcast <16 x i8 > %2 to <4 x i32 >
204
- call void @llvm.aarch64.neon.st3.v4i32.p0 (<4 x i32 > %4 , <4 x i32 > %3 , <4 x i32 > %3 , ptr %0 )
205
- call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %3 , ptr %0 )
206
- %5 = bitcast ptr %a to ptr
207
- %vld3 = call { <4 x i32 >, <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld3.v4i32.p0 (ptr %5 )
193
+ call void @llvm.aarch64.neon.st3.v4i32.p0 (<4 x i32 > %3 , <4 x i32 > %2 , <4 x i32 > %2 , ptr %a )
194
+ call void @llvm.aarch64.neon.st2.v4i32.p0 (<4 x i32 > %2 , <4 x i32 > %2 , ptr %a )
195
+ %vld3 = call { <4 x i32 >, <4 x i32 >, <4 x i32 > } @llvm.aarch64.neon.ld3.v4i32.p0 (ptr %a )
208
196
%vld3.fca.0.extract = extractvalue { <4 x i32 >, <4 x i32 >, <4 x i32 > } %vld3 , 0
209
197
%vld3.fca.1.extract = extractvalue { <4 x i32 >, <4 x i32 >, <4 x i32 > } %vld3 , 1
210
198
%call = call <4 x i32 > @vaddq_s32 (<4 x i32 > %vld3.fca.0.extract , <4 x i32 > %vld3.fca.0.extract )
0 commit comments