@@ -299,6 +299,51 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2
299
299
ret <8 x i16 > %r
300
300
}
301
301
302
+ define <4 x i32 > @gep01_bitcast_load_i32_insert_v4i32 (<16 x i8 >* align 1 dereferenceable (16 ) %p ) {
303
+ ; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
304
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1
305
+ ; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
306
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1
307
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
308
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
309
+ ;
310
+ %gep = getelementptr inbounds <16 x i8 >, <16 x i8 >* %p , i64 0 , i64 1
311
+ %b = bitcast i8* %gep to i32*
312
+ %s = load i32 , i32* %b , align 1
313
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
314
+ ret <4 x i32 > %r
315
+ }
316
+
317
+ define <4 x i32 > @gep012_bitcast_load_i32_insert_v4i32 (<16 x i8 >* align 1 dereferenceable (20 ) %p ) {
318
+ ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
319
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 12
320
+ ; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
321
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1
322
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
323
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
324
+ ;
325
+ %gep = getelementptr inbounds <16 x i8 >, <16 x i8 >* %p , i64 0 , i64 12
326
+ %b = bitcast i8* %gep to i32*
327
+ %s = load i32 , i32* %b , align 1
328
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
329
+ ret <4 x i32 > %r
330
+ }
331
+
332
+ define <4 x i32 > @gep013_bitcast_load_i32_insert_v4i32 (<16 x i8 >* align 1 dereferenceable (20 ) %p ) {
333
+ ; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
334
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13
335
+ ; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
336
+ ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1
337
+ ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
338
+ ; CHECK-NEXT: ret <4 x i32> [[R]]
339
+ ;
340
+ %gep = getelementptr inbounds <16 x i8 >, <16 x i8 >* %p , i64 0 , i64 13
341
+ %b = bitcast i8* %gep to i32*
342
+ %s = load i32 , i32* %b , align 1
343
+ %r = insertelement <4 x i32 > undef , i32 %s , i64 0
344
+ ret <4 x i32 > %r
345
+ }
346
+
302
347
; If there are enough dereferenceable bytes, we can offset the vector load.
303
348
304
349
define <8 x i16 > @gep10_load_i16_insert_v8i16 (<8 x i16 >* align 16 dereferenceable (32 ) %p ) {
0 commit comments