Skip to content

Commit c2ba2b2

Browse files
authored
Fix ISel crash when lowering BUILD_VECTOR (#73186)
512bit vpbroadcastw is available only with AVX512BW. Avoid lowering BUILD_VEC into vbroard_cast node when the condition is not met. This fixed a crash (see the added new test).
1 parent f943646 commit c2ba2b2

File tree

2 files changed

+349
-6
lines changed

2 files changed

+349
-6
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,10 @@ static bool isEndbrImm64(uint64_t Imm) {
881881
return false;
882882
}
883883

884+
static bool needBWI(MVT VT) {
885+
return (VT == MVT::v32i16 || VT == MVT::v32f16 || VT == MVT::v64i8);
886+
}
887+
884888
void X86DAGToDAGISel::PreprocessISelDAG() {
885889
bool MadeChange = false;
886890
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
@@ -986,15 +990,15 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
986990
case X86ISD::VBROADCAST: {
987991
MVT VT = N->getSimpleValueType(0);
988992
// Emulate v32i16/v64i8 broadcast without BWI.
989-
if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
990-
MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
993+
if (!Subtarget->hasBWI() && needBWI(VT)) {
994+
MVT NarrowVT = VT.getHalfNumVectorElementsVT();
991995
SDLoc dl(N);
992996
SDValue NarrowBCast =
993997
CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
994998
SDValue Res =
995999
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
9961000
NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
997-
unsigned Index = VT == MVT::v32i16 ? 16 : 32;
1001+
unsigned Index = NarrowVT.getVectorMinNumElements();
9981002
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
9991003
CurDAG->getIntPtrConstant(Index, dl));
10001004

@@ -1010,8 +1014,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
10101014
case X86ISD::VBROADCAST_LOAD: {
10111015
MVT VT = N->getSimpleValueType(0);
10121016
// Emulate v32i16/v64i8 broadcast without BWI.
1013-
if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
1014-
MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
1017+
if (!Subtarget->hasBWI() && needBWI(VT)) {
1018+
MVT NarrowVT = VT.getHalfNumVectorElementsVT();
10151019
auto *MemNode = cast<MemSDNode>(N);
10161020
SDLoc dl(N);
10171021
SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
@@ -1022,7 +1026,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
10221026
SDValue Res =
10231027
CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
10241028
NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1025-
unsigned Index = VT == MVT::v32i16 ? 16 : 32;
1029+
unsigned Index = NarrowVT.getVectorMinNumElements();
10261030
Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
10271031
CurDAG->getIntPtrConstant(Index, dl));
10281032

llvm/test/CodeGen/X86/shuffle-half.ll

Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,4 +308,343 @@ define <32 x half> @dump_vec() {
308308
ret <32 x half> %1
309309
}
310310

311+
define <32 x half> @build_vec(ptr %p, <32 x i1> %mask) {
312+
; CHECK-LABEL: build_vec:
313+
; CHECK: # %bb.0:
314+
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
315+
; CHECK-NEXT: vpmovmskb %ymm0, %eax
316+
; CHECK-NEXT: testb $1, %al
317+
; CHECK-NEXT: je .LBB1_1
318+
; CHECK-NEXT: # %bb.2: # %cond.load
319+
; CHECK-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
320+
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
321+
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
322+
; CHECK-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
323+
; CHECK-NEXT: testb $2, %al
324+
; CHECK-NEXT: jne .LBB1_4
325+
; CHECK-NEXT: jmp .LBB1_5
326+
; CHECK-NEXT: .LBB1_1:
327+
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
328+
; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
329+
; CHECK-NEXT: testb $2, %al
330+
; CHECK-NEXT: je .LBB1_5
331+
; CHECK-NEXT: .LBB1_4: # %cond.load1
332+
; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm1
333+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
334+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
335+
; CHECK-NEXT: .LBB1_5: # %else2
336+
; CHECK-NEXT: testb $4, %al
337+
; CHECK-NEXT: jne .LBB1_6
338+
; CHECK-NEXT: # %bb.7: # %else5
339+
; CHECK-NEXT: testb $8, %al
340+
; CHECK-NEXT: jne .LBB1_8
341+
; CHECK-NEXT: .LBB1_9: # %else8
342+
; CHECK-NEXT: testb $16, %al
343+
; CHECK-NEXT: jne .LBB1_10
344+
; CHECK-NEXT: .LBB1_11: # %else11
345+
; CHECK-NEXT: testb $32, %al
346+
; CHECK-NEXT: jne .LBB1_12
347+
; CHECK-NEXT: .LBB1_13: # %else14
348+
; CHECK-NEXT: testb $64, %al
349+
; CHECK-NEXT: jne .LBB1_14
350+
; CHECK-NEXT: .LBB1_15: # %else17
351+
; CHECK-NEXT: testb %al, %al
352+
; CHECK-NEXT: js .LBB1_16
353+
; CHECK-NEXT: .LBB1_17: # %else20
354+
; CHECK-NEXT: testl $256, %eax # imm = 0x100
355+
; CHECK-NEXT: jne .LBB1_18
356+
; CHECK-NEXT: .LBB1_19: # %else23
357+
; CHECK-NEXT: testl $512, %eax # imm = 0x200
358+
; CHECK-NEXT: jne .LBB1_20
359+
; CHECK-NEXT: .LBB1_21: # %else26
360+
; CHECK-NEXT: testl $1024, %eax # imm = 0x400
361+
; CHECK-NEXT: jne .LBB1_22
362+
; CHECK-NEXT: .LBB1_23: # %else29
363+
; CHECK-NEXT: testl $2048, %eax # imm = 0x800
364+
; CHECK-NEXT: jne .LBB1_24
365+
; CHECK-NEXT: .LBB1_25: # %else32
366+
; CHECK-NEXT: testl $4096, %eax # imm = 0x1000
367+
; CHECK-NEXT: jne .LBB1_26
368+
; CHECK-NEXT: .LBB1_27: # %else35
369+
; CHECK-NEXT: testl $8192, %eax # imm = 0x2000
370+
; CHECK-NEXT: jne .LBB1_28
371+
; CHECK-NEXT: .LBB1_29: # %else38
372+
; CHECK-NEXT: testl $16384, %eax # imm = 0x4000
373+
; CHECK-NEXT: jne .LBB1_30
374+
; CHECK-NEXT: .LBB1_31: # %else41
375+
; CHECK-NEXT: testw %ax, %ax
376+
; CHECK-NEXT: js .LBB1_32
377+
; CHECK-NEXT: .LBB1_33: # %else44
378+
; CHECK-NEXT: testl $65536, %eax # imm = 0x10000
379+
; CHECK-NEXT: jne .LBB1_34
380+
; CHECK-NEXT: .LBB1_35: # %else47
381+
; CHECK-NEXT: testl $131072, %eax # imm = 0x20000
382+
; CHECK-NEXT: jne .LBB1_36
383+
; CHECK-NEXT: .LBB1_37: # %else50
384+
; CHECK-NEXT: testl $262144, %eax # imm = 0x40000
385+
; CHECK-NEXT: jne .LBB1_38
386+
; CHECK-NEXT: .LBB1_39: # %else53
387+
; CHECK-NEXT: testl $524288, %eax # imm = 0x80000
388+
; CHECK-NEXT: jne .LBB1_40
389+
; CHECK-NEXT: .LBB1_41: # %else56
390+
; CHECK-NEXT: testl $1048576, %eax # imm = 0x100000
391+
; CHECK-NEXT: jne .LBB1_42
392+
; CHECK-NEXT: .LBB1_43: # %else59
393+
; CHECK-NEXT: testl $2097152, %eax # imm = 0x200000
394+
; CHECK-NEXT: jne .LBB1_44
395+
; CHECK-NEXT: .LBB1_45: # %else62
396+
; CHECK-NEXT: testl $4194304, %eax # imm = 0x400000
397+
; CHECK-NEXT: jne .LBB1_46
398+
; CHECK-NEXT: .LBB1_47: # %else65
399+
; CHECK-NEXT: testl $8388608, %eax # imm = 0x800000
400+
; CHECK-NEXT: jne .LBB1_48
401+
; CHECK-NEXT: .LBB1_49: # %else68
402+
; CHECK-NEXT: testl $16777216, %eax # imm = 0x1000000
403+
; CHECK-NEXT: jne .LBB1_50
404+
; CHECK-NEXT: .LBB1_51: # %else71
405+
; CHECK-NEXT: testl $33554432, %eax # imm = 0x2000000
406+
; CHECK-NEXT: jne .LBB1_52
407+
; CHECK-NEXT: .LBB1_53: # %else74
408+
; CHECK-NEXT: testl $67108864, %eax # imm = 0x4000000
409+
; CHECK-NEXT: jne .LBB1_54
410+
; CHECK-NEXT: .LBB1_55: # %else77
411+
; CHECK-NEXT: testl $134217728, %eax # imm = 0x8000000
412+
; CHECK-NEXT: jne .LBB1_56
413+
; CHECK-NEXT: .LBB1_57: # %else80
414+
; CHECK-NEXT: testl $268435456, %eax # imm = 0x10000000
415+
; CHECK-NEXT: jne .LBB1_58
416+
; CHECK-NEXT: .LBB1_59: # %else83
417+
; CHECK-NEXT: testl $536870912, %eax # imm = 0x20000000
418+
; CHECK-NEXT: jne .LBB1_60
419+
; CHECK-NEXT: .LBB1_61: # %else86
420+
; CHECK-NEXT: testl $1073741824, %eax # imm = 0x40000000
421+
; CHECK-NEXT: jne .LBB1_62
422+
; CHECK-NEXT: .LBB1_63: # %else89
423+
; CHECK-NEXT: testl $-2147483648, %eax # imm = 0x80000000
424+
; CHECK-NEXT: jne .LBB1_64
425+
; CHECK-NEXT: .LBB1_65: # %else92
426+
; CHECK-NEXT: retq
427+
; CHECK-NEXT: .LBB1_6: # %cond.load4
428+
; CHECK-NEXT: vpbroadcastw 4(%rdi), %xmm1
429+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
430+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
431+
; CHECK-NEXT: testb $8, %al
432+
; CHECK-NEXT: je .LBB1_9
433+
; CHECK-NEXT: .LBB1_8: # %cond.load7
434+
; CHECK-NEXT: vpbroadcastw 6(%rdi), %xmm1
435+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
436+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
437+
; CHECK-NEXT: testb $16, %al
438+
; CHECK-NEXT: je .LBB1_11
439+
; CHECK-NEXT: .LBB1_10: # %cond.load10
440+
; CHECK-NEXT: vpbroadcastw 8(%rdi), %xmm1
441+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
442+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
443+
; CHECK-NEXT: testb $32, %al
444+
; CHECK-NEXT: je .LBB1_13
445+
; CHECK-NEXT: .LBB1_12: # %cond.load13
446+
; CHECK-NEXT: vpbroadcastw 10(%rdi), %xmm1
447+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
448+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
449+
; CHECK-NEXT: testb $64, %al
450+
; CHECK-NEXT: je .LBB1_15
451+
; CHECK-NEXT: .LBB1_14: # %cond.load16
452+
; CHECK-NEXT: vpbroadcastw 12(%rdi), %xmm1
453+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7]
454+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
455+
; CHECK-NEXT: testb %al, %al
456+
; CHECK-NEXT: jns .LBB1_17
457+
; CHECK-NEXT: .LBB1_16: # %cond.load19
458+
; CHECK-NEXT: vpbroadcastw 14(%rdi), %xmm1
459+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
460+
; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
461+
; CHECK-NEXT: testl $256, %eax # imm = 0x100
462+
; CHECK-NEXT: je .LBB1_19
463+
; CHECK-NEXT: .LBB1_18: # %cond.load22
464+
; CHECK-NEXT: vpbroadcastw 16(%rdi), %ymm1
465+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
466+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
467+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
468+
; CHECK-NEXT: testl $512, %eax # imm = 0x200
469+
; CHECK-NEXT: je .LBB1_21
470+
; CHECK-NEXT: .LBB1_20: # %cond.load25
471+
; CHECK-NEXT: vpbroadcastw 18(%rdi), %ymm1
472+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
473+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
474+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
475+
; CHECK-NEXT: testl $1024, %eax # imm = 0x400
476+
; CHECK-NEXT: je .LBB1_23
477+
; CHECK-NEXT: .LBB1_22: # %cond.load28
478+
; CHECK-NEXT: vpbroadcastw 20(%rdi), %ymm1
479+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
480+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
481+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
482+
; CHECK-NEXT: testl $2048, %eax # imm = 0x800
483+
; CHECK-NEXT: je .LBB1_25
484+
; CHECK-NEXT: .LBB1_24: # %cond.load31
485+
; CHECK-NEXT: vpbroadcastw 22(%rdi), %ymm1
486+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
487+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
488+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
489+
; CHECK-NEXT: testl $4096, %eax # imm = 0x1000
490+
; CHECK-NEXT: je .LBB1_27
491+
; CHECK-NEXT: .LBB1_26: # %cond.load34
492+
; CHECK-NEXT: vpbroadcastw 24(%rdi), %ymm1
493+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
494+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
495+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
496+
; CHECK-NEXT: testl $8192, %eax # imm = 0x2000
497+
; CHECK-NEXT: je .LBB1_29
498+
; CHECK-NEXT: .LBB1_28: # %cond.load37
499+
; CHECK-NEXT: vpbroadcastw 26(%rdi), %ymm1
500+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
501+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
502+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
503+
; CHECK-NEXT: testl $16384, %eax # imm = 0x4000
504+
; CHECK-NEXT: je .LBB1_31
505+
; CHECK-NEXT: .LBB1_30: # %cond.load40
506+
; CHECK-NEXT: vpbroadcastw 28(%rdi), %ymm1
507+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6],ymm0[7,8,9,10,11,12,13],ymm1[14],ymm0[15]
508+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
509+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
510+
; CHECK-NEXT: testw %ax, %ax
511+
; CHECK-NEXT: jns .LBB1_33
512+
; CHECK-NEXT: .LBB1_32: # %cond.load43
513+
; CHECK-NEXT: vpbroadcastw 30(%rdi), %ymm1
514+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
515+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
516+
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
517+
; CHECK-NEXT: testl $65536, %eax # imm = 0x10000
518+
; CHECK-NEXT: je .LBB1_35
519+
; CHECK-NEXT: .LBB1_34: # %cond.load46
520+
; CHECK-NEXT: vpbroadcastw 32(%rdi), %xmm1
521+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
522+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
523+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
524+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
525+
; CHECK-NEXT: testl $131072, %eax # imm = 0x20000
526+
; CHECK-NEXT: je .LBB1_37
527+
; CHECK-NEXT: .LBB1_36: # %cond.load49
528+
; CHECK-NEXT: vpbroadcastw 34(%rdi), %xmm1
529+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
530+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2,3,4,5,6,7]
531+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
532+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
533+
; CHECK-NEXT: testl $262144, %eax # imm = 0x40000
534+
; CHECK-NEXT: je .LBB1_39
535+
; CHECK-NEXT: .LBB1_38: # %cond.load52
536+
; CHECK-NEXT: vpbroadcastw 36(%rdi), %xmm1
537+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
538+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2],xmm2[3,4,5,6,7]
539+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
540+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
541+
; CHECK-NEXT: testl $524288, %eax # imm = 0x80000
542+
; CHECK-NEXT: je .LBB1_41
543+
; CHECK-NEXT: .LBB1_40: # %cond.load55
544+
; CHECK-NEXT: vpbroadcastw 38(%rdi), %xmm1
545+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
546+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3],xmm2[4,5,6,7]
547+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
548+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
549+
; CHECK-NEXT: testl $1048576, %eax # imm = 0x100000
550+
; CHECK-NEXT: je .LBB1_43
551+
; CHECK-NEXT: .LBB1_42: # %cond.load58
552+
; CHECK-NEXT: vpbroadcastw 40(%rdi), %xmm1
553+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
554+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4],xmm2[5,6,7]
555+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
556+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
557+
; CHECK-NEXT: testl $2097152, %eax # imm = 0x200000
558+
; CHECK-NEXT: je .LBB1_45
559+
; CHECK-NEXT: .LBB1_44: # %cond.load61
560+
; CHECK-NEXT: vpbroadcastw 42(%rdi), %xmm1
561+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
562+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4],xmm1[5],xmm2[6,7]
563+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
564+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
565+
; CHECK-NEXT: testl $4194304, %eax # imm = 0x400000
566+
; CHECK-NEXT: je .LBB1_47
567+
; CHECK-NEXT: .LBB1_46: # %cond.load64
568+
; CHECK-NEXT: vpbroadcastw 44(%rdi), %xmm1
569+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
570+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
571+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
572+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
573+
; CHECK-NEXT: testl $8388608, %eax # imm = 0x800000
574+
; CHECK-NEXT: je .LBB1_49
575+
; CHECK-NEXT: .LBB1_48: # %cond.load67
576+
; CHECK-NEXT: vpbroadcastw 46(%rdi), %xmm1
577+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
578+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
579+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
580+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
581+
; CHECK-NEXT: testl $16777216, %eax # imm = 0x1000000
582+
; CHECK-NEXT: je .LBB1_51
583+
; CHECK-NEXT: .LBB1_50: # %cond.load70
584+
; CHECK-NEXT: vpbroadcastw 48(%rdi), %ymm1
585+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
586+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
587+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
588+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
589+
; CHECK-NEXT: testl $33554432, %eax # imm = 0x2000000
590+
; CHECK-NEXT: je .LBB1_53
591+
; CHECK-NEXT: .LBB1_52: # %cond.load73
592+
; CHECK-NEXT: vpbroadcastw 50(%rdi), %ymm1
593+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
594+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7,8],ymm1[9],ymm2[10,11,12,13,14,15]
595+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
596+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
597+
; CHECK-NEXT: testl $67108864, %eax # imm = 0x4000000
598+
; CHECK-NEXT: je .LBB1_55
599+
; CHECK-NEXT: .LBB1_54: # %cond.load76
600+
; CHECK-NEXT: vpbroadcastw 52(%rdi), %ymm1
601+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
602+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7,8,9],ymm1[10],ymm2[11,12,13,14,15]
603+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
604+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
605+
; CHECK-NEXT: testl $134217728, %eax # imm = 0x8000000
606+
; CHECK-NEXT: je .LBB1_57
607+
; CHECK-NEXT: .LBB1_56: # %cond.load79
608+
; CHECK-NEXT: vpbroadcastw 54(%rdi), %ymm1
609+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
610+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6,7,8,9,10],ymm1[11],ymm2[12,13,14,15]
611+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
612+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
613+
; CHECK-NEXT: testl $268435456, %eax # imm = 0x10000000
614+
; CHECK-NEXT: je .LBB1_59
615+
; CHECK-NEXT: .LBB1_58: # %cond.load82
616+
; CHECK-NEXT: vpbroadcastw 56(%rdi), %ymm1
617+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
618+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4],ymm2[5,6,7,8,9,10,11],ymm1[12],ymm2[13,14,15]
619+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
620+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
621+
; CHECK-NEXT: testl $536870912, %eax # imm = 0x20000000
622+
; CHECK-NEXT: je .LBB1_61
623+
; CHECK-NEXT: .LBB1_60: # %cond.load85
624+
; CHECK-NEXT: vpbroadcastw 58(%rdi), %ymm1
625+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
626+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4],ymm1[5],ymm2[6,7,8,9,10,11,12],ymm1[13],ymm2[14,15]
627+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
628+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
629+
; CHECK-NEXT: testl $1073741824, %eax # imm = 0x40000000
630+
; CHECK-NEXT: je .LBB1_63
631+
; CHECK-NEXT: .LBB1_62: # %cond.load88
632+
; CHECK-NEXT: vpbroadcastw 60(%rdi), %ymm1
633+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
634+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5],ymm1[6],ymm2[7,8,9,10,11,12,13],ymm1[14],ymm2[15]
635+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
636+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
637+
; CHECK-NEXT: testl $-2147483648, %eax # imm = 0x80000000
638+
; CHECK-NEXT: je .LBB1_65
639+
; CHECK-NEXT: .LBB1_64: # %cond.load91
640+
; CHECK-NEXT: vpbroadcastw 62(%rdi), %ymm1
641+
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
642+
; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7],ymm2[8,9,10,11,12,13,14],ymm1[15]
643+
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
644+
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
645+
; CHECK-NEXT: retq
646+
%1 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr %p, i32 2, <32 x i1 > %mask, <32 x half> <half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0, half 2.0>)
647+
ret <32 x half> %1
648+
}
649+
311650
declare <32 x half> @llvm.masked.load.v32f16.p0(ptr, i32, <32 x i1>, <32 x half>)

0 commit comments

Comments
 (0)