Skip to content

Commit c8ebc7a

Browse files
committed
add test-cases
1 parent 0f7b3a9 commit c8ebc7a

File tree

2 files changed

+212
-4
lines changed

2 files changed

+212
-4
lines changed

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
306306
; must be a multiple of element size.
307307
; TODO: Could bitcast around this limitation.
308308

309-
define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(16) %p) nofree nosync {
310-
; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
309+
define <4 x i32> @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
310+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
311311
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
312312
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
313313
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
@@ -319,6 +319,84 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
319319
ret <4 x i32> %r
320320
}
321321

322+
define <2 x i64> @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
323+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
324+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
325+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
326+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
327+
; CHECK-NEXT: ret <2 x i64> [[R]]
328+
;
329+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 1
330+
%s = load i64, ptr %gep, align 1
331+
%r = insertelement <2 x i64> poison, i64 %s, i64 0
332+
ret <2 x i64> %r
333+
}
334+
335+
define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
336+
; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
337+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
338+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
339+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
340+
; CHECK-NEXT: ret <4 x i32> [[R]]
341+
;
342+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 11
343+
%s = load i32, ptr %gep, align 1
344+
%r = insertelement <4 x i32> poison, i32 %s, i64 0
345+
ret <4 x i32> %r
346+
}
347+
348+
define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
349+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
350+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
351+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
352+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
353+
; CHECK-NEXT: ret <4 x i32> [[R]]
354+
;
355+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
356+
%s = load i32, ptr %gep, align 1
357+
%r = insertelement <4 x i32> poison, i32 %s, i64 0
358+
ret <4 x i32> %r
359+
}
360+
361+
define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
362+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
363+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
364+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
365+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
366+
; CHECK-NEXT: ret <2 x i64> [[R]]
367+
;
368+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
369+
%s = load i64, ptr %gep, align 1
370+
%r = insertelement <2 x i64> poison, i64 %s, i64 0
371+
ret <2 x i64> %r
372+
}
373+
374+
define <4 x i32> @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
375+
; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
376+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
377+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
378+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
379+
; CHECK-NEXT: ret <4 x i32> [[R]]
380+
;
381+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
382+
%s = load i32, ptr %gep, align 1
383+
%r = insertelement <4 x i32> poison, i32 %s, i64 0
384+
ret <4 x i32> %r
385+
}
386+
387+
define <2 x i64> @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(ptr align 1 dereferenceable(16) %p) nofree nosync {
388+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
389+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
390+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
391+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
392+
; CHECK-NEXT: ret <2 x i64> [[R]]
393+
;
394+
%gep = getelementptr inbounds <4 x i32>, ptr %p, i64 0, i64 1
395+
%s = load i64, ptr %gep, align 1
396+
%r = insertelement <2 x i64> poison, i64 %s, i64 0
397+
ret <2 x i64> %r
398+
}
399+
322400
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
323401
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
324402
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1

llvm/test/Transforms/VectorCombine/X86/load.ll

Lines changed: 132 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
289289
; must be a multiple of element size.
290290
; TODO: Could bitcast around this limitation.
291291

292-
define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
293-
; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
292+
define <4 x i32> @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
293+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
294294
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
295295
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
296296
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
@@ -302,6 +302,84 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
302302
ret <4 x i32> %r
303303
}
304304

305+
define <2 x i64> @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
306+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
307+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
308+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
309+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
310+
; CHECK-NEXT: ret <2 x i64> [[R]]
311+
;
312+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 1
313+
%s = load i64, ptr %gep, align 1
314+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
315+
ret <2 x i64> %r
316+
}
317+
318+
define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
319+
; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
320+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
321+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
322+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
323+
; CHECK-NEXT: ret <4 x i32> [[R]]
324+
;
325+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 11
326+
%s = load i32, ptr %gep, align 1
327+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
328+
ret <4 x i32> %r
329+
}
330+
331+
define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
332+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
333+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
334+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
335+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
336+
; CHECK-NEXT: ret <4 x i32> [[R]]
337+
;
338+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
339+
%s = load i32, ptr %gep, align 1
340+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
341+
ret <4 x i32> %r
342+
}
343+
344+
define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
345+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
346+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
347+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
348+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
349+
; CHECK-NEXT: ret <2 x i64> [[R]]
350+
;
351+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
352+
%s = load i64, ptr %gep, align 1
353+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
354+
ret <2 x i64> %r
355+
}
356+
357+
define <4 x i32> @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
358+
; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
359+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
360+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
361+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
362+
; CHECK-NEXT: ret <4 x i32> [[R]]
363+
;
364+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
365+
%s = load i32, ptr %gep, align 1
366+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
367+
ret <4 x i32> %r
368+
}
369+
370+
define <2 x i64> @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
371+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
372+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
373+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
374+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
375+
; CHECK-NEXT: ret <2 x i64> [[R]]
376+
;
377+
%gep = getelementptr inbounds <4 x i32>, ptr %p, i64 0, i64 1
378+
%s = load i64, ptr %gep, align 1
379+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
380+
ret <2 x i64> %r
381+
}
382+
305383
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
306384
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
307385
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
@@ -331,6 +409,58 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
331409
ret <4 x i32> %r
332410
}
333411

412+
define <4 x i32> @gep07_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
413+
; CHECK-LABEL: @gep07_bitcast_load_i32_from_v8i16_insert_v4i32(
414+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 7
415+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
416+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
417+
; CHECK-NEXT: ret <4 x i32> [[R]]
418+
;
419+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 7
420+
%s = load i32, ptr %gep, align 1
421+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
422+
ret <4 x i32> %r
423+
}
424+
425+
define <2 x i64> @gep03_bitcast_load_i32_from_v4i32_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
426+
; CHECK-LABEL: @gep03_bitcast_load_i32_from_v4i32_insert_v2i64(
427+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 3
428+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
429+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
430+
; CHECK-NEXT: ret <2 x i64> [[R]]
431+
;
432+
%gep = getelementptr inbounds <4 x i32>, ptr %p, i64 0, i64 3
433+
%s = load i64, ptr %gep, align 1
434+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
435+
ret <2 x i64> %r
436+
}
437+
438+
define <2 x i64> @gep09_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 dereferenceable(16) %p) #0 {
439+
; CHECK-LABEL: @gep09_bitcast_load_i64_from_v16i8_insert_v2i64(
440+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 9
441+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
442+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
443+
; CHECK-NEXT: ret <2 x i64> [[R]]
444+
;
445+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 9
446+
%s = load i64, ptr %gep, align 1
447+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
448+
ret <2 x i64> %r
449+
}
450+
451+
define <2 x i64> @gep05_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
452+
; CHECK-LABEL: @gep05_bitcast_load_i64_from_v8i16_insert_v2i64(
453+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
454+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
455+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
456+
; CHECK-NEXT: ret <2 x i64> [[R]]
457+
;
458+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
459+
%s = load i64, ptr %gep, align 1
460+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
461+
ret <2 x i64> %r
462+
}
463+
334464
; If there are enough dereferenceable bytes, we can offset the vector load.
335465

336466
define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync {

0 commit comments

Comments
 (0)