@@ -78,67 +78,13 @@ define void @test1(ptr %p, ptr noalias %s, i32 %stride) {
78
78
; CHECK-NEXT: entry:
79
79
; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
80
80
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
81
- ; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
82
81
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
83
- ; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
84
- ; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
85
82
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
86
- ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
87
- ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
88
- ; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
89
- ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
90
- ; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
91
- ; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
92
- ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
93
- ; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
94
- ; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
95
- ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
96
- ; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
97
- ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
98
- ; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
99
- ; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
100
- ; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
101
- ; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
102
- ; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
103
- ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
104
- ; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
105
- ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
106
- ; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
107
- ; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
108
- ; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
109
- ; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
110
- ; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
111
- ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
112
- ; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
113
- ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
114
- ; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
115
- ; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
116
- ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
117
- ; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
118
- ; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
119
- ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
120
- ; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
121
- ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
122
- ; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
123
- ; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
124
- ; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
125
- ; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
126
- ; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
127
- ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
128
- ; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
129
- ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
130
- ; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
131
- ; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
132
- ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
133
- ; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
134
- ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
135
- ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
136
- ; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
137
- ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
138
- ; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
139
- ; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
140
- ; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
141
- ; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
83
+ ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4
84
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
85
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
86
+ ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]]
87
+ ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
142
88
; CHECK-NEXT: ret void
143
89
;
144
90
entry:
@@ -215,38 +161,12 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) {
215
161
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
216
162
; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
217
163
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
218
- ; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
219
164
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
220
- ; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
221
- ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
222
- ; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
223
- ; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
224
- ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
225
- ; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
226
- ; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
227
- ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
228
- ; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
229
- ; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
230
- ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
231
- ; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
232
- ; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
233
- ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
234
- ; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
235
- ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
236
- ; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
237
- ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
238
- ; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
239
165
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
240
- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[I1]], i32 0
241
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I3]], i32 1
242
- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I5]], i32 2
243
- ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I7]], i32 3
244
- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I9]], i32 4
245
- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I11]], i32 5
246
- ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I13]], i32 6
247
- ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[I15]], i32 7
248
- ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <8 x float> [[TMP8]], [[TMP0]]
249
- ; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[ARRAYIDX2]], align 4
166
+ ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4
167
+ ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
168
+ ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
169
+ ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
250
170
; CHECK-NEXT: ret void
251
171
;
252
172
entry:
0 commit comments