Skip to content

Commit 5b0167c

Browse files
committed
add test-cases
1 parent 50cdf6c commit 5b0167c

File tree

2 files changed

+212
-4
lines changed

2 files changed

+212
-4
lines changed

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
294294
; must be a multiple of element size.
295295
; TODO: Could bitcast around this limitation.
296296

297-
define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(16) %p) nofree nosync {
298-
; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
297+
define <4 x i32> @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
298+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
299299
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
300300
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
301301
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
@@ -307,6 +307,84 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
307307
ret <4 x i32> %r
308308
}
309309

310+
define <2 x i64> @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
311+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
312+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
313+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
314+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
315+
; CHECK-NEXT: ret <2 x i64> [[R]]
316+
;
317+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 1
318+
%s = load i64, ptr %gep, align 1
319+
%r = insertelement <2 x i64> poison, i64 %s, i64 0
320+
ret <2 x i64> %r
321+
}
322+
323+
define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
324+
; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
325+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
326+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
327+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
328+
; CHECK-NEXT: ret <4 x i32> [[R]]
329+
;
330+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 11
331+
%s = load i32, ptr %gep, align 1
332+
%r = insertelement <4 x i32> poison, i32 %s, i64 0
333+
ret <4 x i32> %r
334+
}
335+
336+
define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
337+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
338+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
339+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
340+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
341+
; CHECK-NEXT: ret <4 x i32> [[R]]
342+
;
343+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
344+
%s = load i32, ptr %gep, align 1
345+
%r = insertelement <4 x i32> poison, i32 %s, i64 0
346+
ret <4 x i32> %r
347+
}
348+
349+
define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
350+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
351+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
352+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
353+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
354+
; CHECK-NEXT: ret <2 x i64> [[R]]
355+
;
356+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
357+
%s = load i64, ptr %gep, align 1
358+
%r = insertelement <2 x i64> poison, i64 %s, i64 0
359+
ret <2 x i64> %r
360+
}
361+
362+
define <4 x i32> @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
363+
; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
364+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
365+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
366+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
367+
; CHECK-NEXT: ret <4 x i32> [[R]]
368+
;
369+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
370+
%s = load i32, ptr %gep, align 1
371+
%r = insertelement <4 x i32> poison, i32 %s, i64 0
372+
ret <4 x i32> %r
373+
}
374+
375+
define <2 x i64> @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(ptr align 1 dereferenceable(16) %p) nofree nosync {
376+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
377+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
378+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
379+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
380+
; CHECK-NEXT: ret <2 x i64> [[R]]
381+
;
382+
%gep = getelementptr inbounds <4 x i32>, ptr %p, i64 0, i64 1
383+
%s = load i64, ptr %gep, align 1
384+
%r = insertelement <2 x i64> poison, i64 %s, i64 0
385+
ret <2 x i64> %r
386+
}
387+
310388
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
311389
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
312390
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1

llvm/test/Transforms/VectorCombine/X86/load.ll

Lines changed: 132 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,8 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
279279
; must be a multiple of element size.
280280
; TODO: Could bitcast around this limitation.
281281

282-
define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
283-
; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
282+
define <4 x i32> @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
283+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
284284
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
285285
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
286286
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
@@ -292,6 +292,84 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl
292292
ret <4 x i32> %r
293293
}
294294

295+
define <2 x i64> @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
296+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
297+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
298+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
299+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
300+
; CHECK-NEXT: ret <2 x i64> [[R]]
301+
;
302+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 1
303+
%s = load i64, ptr %gep, align 1
304+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
305+
ret <2 x i64> %r
306+
}
307+
308+
define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
309+
; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
310+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
311+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
312+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
313+
; CHECK-NEXT: ret <4 x i32> [[R]]
314+
;
315+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 11
316+
%s = load i32, ptr %gep, align 1
317+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
318+
ret <4 x i32> %r
319+
}
320+
321+
define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
322+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
323+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
324+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
325+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
326+
; CHECK-NEXT: ret <4 x i32> [[R]]
327+
;
328+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
329+
%s = load i32, ptr %gep, align 1
330+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
331+
ret <4 x i32> %r
332+
}
333+
334+
define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
335+
; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
336+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
337+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
338+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
339+
; CHECK-NEXT: ret <2 x i64> [[R]]
340+
;
341+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1
342+
%s = load i64, ptr %gep, align 1
343+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
344+
ret <2 x i64> %r
345+
}
346+
347+
define <4 x i32> @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
348+
; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
349+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
350+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
351+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
352+
; CHECK-NEXT: ret <4 x i32> [[R]]
353+
;
354+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
355+
%s = load i32, ptr %gep, align 1
356+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
357+
ret <4 x i32> %r
358+
}
359+
360+
define <2 x i64> @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
361+
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
362+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
363+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
364+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
365+
; CHECK-NEXT: ret <2 x i64> [[R]]
366+
;
367+
%gep = getelementptr inbounds <4 x i32>, ptr %p, i64 0, i64 1
368+
%s = load i64, ptr %gep, align 1
369+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
370+
ret <2 x i64> %r
371+
}
372+
295373
define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync {
296374
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
297375
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 12
@@ -322,6 +400,58 @@ define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceab
322400
ret <4 x i32> %r
323401
}
324402

403+
define <4 x i32> @gep07_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 dereferenceable(16) %p) {
404+
; CHECK-LABEL: @gep07_bitcast_load_i32_from_v8i16_insert_v4i32(
405+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 7
406+
; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
407+
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
408+
; CHECK-NEXT: ret <4 x i32> [[R]]
409+
;
410+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 7
411+
%s = load i32, ptr %gep, align 1
412+
%r = insertelement <4 x i32> undef, i32 %s, i64 0
413+
ret <4 x i32> %r
414+
}
415+
416+
define <2 x i64> @gep03_bitcast_load_i32_from_v4i32_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
417+
; CHECK-LABEL: @gep03_bitcast_load_i32_from_v4i32_insert_v2i64(
418+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 3
419+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
420+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
421+
; CHECK-NEXT: ret <2 x i64> [[R]]
422+
;
423+
%gep = getelementptr inbounds <4 x i32>, ptr %p, i64 0, i64 3
424+
%s = load i64, ptr %gep, align 1
425+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
426+
ret <2 x i64> %r
427+
}
428+
429+
define <2 x i64> @gep09_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 dereferenceable(16) %p) #0 {
430+
; CHECK-LABEL: @gep09_bitcast_load_i64_from_v16i8_insert_v2i64(
431+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 9
432+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
433+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
434+
; CHECK-NEXT: ret <2 x i64> [[R]]
435+
;
436+
%gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 9
437+
%s = load i64, ptr %gep, align 1
438+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
439+
ret <2 x i64> %r
440+
}
441+
442+
define <2 x i64> @gep05_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 dereferenceable(16) %p) {
443+
; CHECK-LABEL: @gep05_bitcast_load_i64_from_v8i16_insert_v2i64(
444+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
445+
; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
446+
; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> undef, i64 [[S]], i64 0
447+
; CHECK-NEXT: ret <2 x i64> [[R]]
448+
;
449+
%gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 5
450+
%s = load i64, ptr %gep, align 1
451+
%r = insertelement <2 x i64> undef, i64 %s, i64 0
452+
ret <2 x i64> %r
453+
}
454+
325455
; If there are enough dereferenceable bytes, we can offset the vector load.
326456

327457
define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) %p) nofree nosync {

0 commit comments

Comments
 (0)