@@ -308,4 +308,343 @@ define <32 x half> @dump_vec() {
308
308
ret <32 x half > %1
309
309
}
310
310
311
+ define <32 x half > @build_vec (ptr %p , <32 x i1 > %mask ) {
312
+ ; CHECK-LABEL: build_vec:
313
+ ; CHECK: # %bb.0:
314
+ ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
315
+ ; CHECK-NEXT: vpmovmskb %ymm0, %eax
316
+ ; CHECK-NEXT: testb $1, %al
317
+ ; CHECK-NEXT: je .LBB1_1
318
+ ; CHECK-NEXT: # %bb.2: # %cond.load
319
+ ; CHECK-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
320
+ ; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
321
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
322
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
323
+ ; CHECK-NEXT: testb $2, %al
324
+ ; CHECK-NEXT: jne .LBB1_4
325
+ ; CHECK-NEXT: jmp .LBB1_5
326
+ ; CHECK-NEXT: .LBB1_1:
327
+ ; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm0 = [2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
328
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
329
+ ; CHECK-NEXT: testb $2, %al
330
+ ; CHECK-NEXT: je .LBB1_5
331
+ ; CHECK-NEXT: .LBB1_4: # %cond.load1
332
+ ; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm1
333
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
334
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
335
+ ; CHECK-NEXT: .LBB1_5: # %else2
336
+ ; CHECK-NEXT: testb $4, %al
337
+ ; CHECK-NEXT: jne .LBB1_6
338
+ ; CHECK-NEXT: # %bb.7: # %else5
339
+ ; CHECK-NEXT: testb $8, %al
340
+ ; CHECK-NEXT: jne .LBB1_8
341
+ ; CHECK-NEXT: .LBB1_9: # %else8
342
+ ; CHECK-NEXT: testb $16, %al
343
+ ; CHECK-NEXT: jne .LBB1_10
344
+ ; CHECK-NEXT: .LBB1_11: # %else11
345
+ ; CHECK-NEXT: testb $32, %al
346
+ ; CHECK-NEXT: jne .LBB1_12
347
+ ; CHECK-NEXT: .LBB1_13: # %else14
348
+ ; CHECK-NEXT: testb $64, %al
349
+ ; CHECK-NEXT: jne .LBB1_14
350
+ ; CHECK-NEXT: .LBB1_15: # %else17
351
+ ; CHECK-NEXT: testb %al, %al
352
+ ; CHECK-NEXT: js .LBB1_16
353
+ ; CHECK-NEXT: .LBB1_17: # %else20
354
+ ; CHECK-NEXT: testl $256, %eax # imm = 0x100
355
+ ; CHECK-NEXT: jne .LBB1_18
356
+ ; CHECK-NEXT: .LBB1_19: # %else23
357
+ ; CHECK-NEXT: testl $512, %eax # imm = 0x200
358
+ ; CHECK-NEXT: jne .LBB1_20
359
+ ; CHECK-NEXT: .LBB1_21: # %else26
360
+ ; CHECK-NEXT: testl $1024, %eax # imm = 0x400
361
+ ; CHECK-NEXT: jne .LBB1_22
362
+ ; CHECK-NEXT: .LBB1_23: # %else29
363
+ ; CHECK-NEXT: testl $2048, %eax # imm = 0x800
364
+ ; CHECK-NEXT: jne .LBB1_24
365
+ ; CHECK-NEXT: .LBB1_25: # %else32
366
+ ; CHECK-NEXT: testl $4096, %eax # imm = 0x1000
367
+ ; CHECK-NEXT: jne .LBB1_26
368
+ ; CHECK-NEXT: .LBB1_27: # %else35
369
+ ; CHECK-NEXT: testl $8192, %eax # imm = 0x2000
370
+ ; CHECK-NEXT: jne .LBB1_28
371
+ ; CHECK-NEXT: .LBB1_29: # %else38
372
+ ; CHECK-NEXT: testl $16384, %eax # imm = 0x4000
373
+ ; CHECK-NEXT: jne .LBB1_30
374
+ ; CHECK-NEXT: .LBB1_31: # %else41
375
+ ; CHECK-NEXT: testw %ax, %ax
376
+ ; CHECK-NEXT: js .LBB1_32
377
+ ; CHECK-NEXT: .LBB1_33: # %else44
378
+ ; CHECK-NEXT: testl $65536, %eax # imm = 0x10000
379
+ ; CHECK-NEXT: jne .LBB1_34
380
+ ; CHECK-NEXT: .LBB1_35: # %else47
381
+ ; CHECK-NEXT: testl $131072, %eax # imm = 0x20000
382
+ ; CHECK-NEXT: jne .LBB1_36
383
+ ; CHECK-NEXT: .LBB1_37: # %else50
384
+ ; CHECK-NEXT: testl $262144, %eax # imm = 0x40000
385
+ ; CHECK-NEXT: jne .LBB1_38
386
+ ; CHECK-NEXT: .LBB1_39: # %else53
387
+ ; CHECK-NEXT: testl $524288, %eax # imm = 0x80000
388
+ ; CHECK-NEXT: jne .LBB1_40
389
+ ; CHECK-NEXT: .LBB1_41: # %else56
390
+ ; CHECK-NEXT: testl $1048576, %eax # imm = 0x100000
391
+ ; CHECK-NEXT: jne .LBB1_42
392
+ ; CHECK-NEXT: .LBB1_43: # %else59
393
+ ; CHECK-NEXT: testl $2097152, %eax # imm = 0x200000
394
+ ; CHECK-NEXT: jne .LBB1_44
395
+ ; CHECK-NEXT: .LBB1_45: # %else62
396
+ ; CHECK-NEXT: testl $4194304, %eax # imm = 0x400000
397
+ ; CHECK-NEXT: jne .LBB1_46
398
+ ; CHECK-NEXT: .LBB1_47: # %else65
399
+ ; CHECK-NEXT: testl $8388608, %eax # imm = 0x800000
400
+ ; CHECK-NEXT: jne .LBB1_48
401
+ ; CHECK-NEXT: .LBB1_49: # %else68
402
+ ; CHECK-NEXT: testl $16777216, %eax # imm = 0x1000000
403
+ ; CHECK-NEXT: jne .LBB1_50
404
+ ; CHECK-NEXT: .LBB1_51: # %else71
405
+ ; CHECK-NEXT: testl $33554432, %eax # imm = 0x2000000
406
+ ; CHECK-NEXT: jne .LBB1_52
407
+ ; CHECK-NEXT: .LBB1_53: # %else74
408
+ ; CHECK-NEXT: testl $67108864, %eax # imm = 0x4000000
409
+ ; CHECK-NEXT: jne .LBB1_54
410
+ ; CHECK-NEXT: .LBB1_55: # %else77
411
+ ; CHECK-NEXT: testl $134217728, %eax # imm = 0x8000000
412
+ ; CHECK-NEXT: jne .LBB1_56
413
+ ; CHECK-NEXT: .LBB1_57: # %else80
414
+ ; CHECK-NEXT: testl $268435456, %eax # imm = 0x10000000
415
+ ; CHECK-NEXT: jne .LBB1_58
416
+ ; CHECK-NEXT: .LBB1_59: # %else83
417
+ ; CHECK-NEXT: testl $536870912, %eax # imm = 0x20000000
418
+ ; CHECK-NEXT: jne .LBB1_60
419
+ ; CHECK-NEXT: .LBB1_61: # %else86
420
+ ; CHECK-NEXT: testl $1073741824, %eax # imm = 0x40000000
421
+ ; CHECK-NEXT: jne .LBB1_62
422
+ ; CHECK-NEXT: .LBB1_63: # %else89
423
+ ; CHECK-NEXT: testl $-2147483648, %eax # imm = 0x80000000
424
+ ; CHECK-NEXT: jne .LBB1_64
425
+ ; CHECK-NEXT: .LBB1_65: # %else92
426
+ ; CHECK-NEXT: retq
427
+ ; CHECK-NEXT: .LBB1_6: # %cond.load4
428
+ ; CHECK-NEXT: vpbroadcastw 4(%rdi), %xmm1
429
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
430
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
431
+ ; CHECK-NEXT: testb $8, %al
432
+ ; CHECK-NEXT: je .LBB1_9
433
+ ; CHECK-NEXT: .LBB1_8: # %cond.load7
434
+ ; CHECK-NEXT: vpbroadcastw 6(%rdi), %xmm1
435
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
436
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
437
+ ; CHECK-NEXT: testb $16, %al
438
+ ; CHECK-NEXT: je .LBB1_11
439
+ ; CHECK-NEXT: .LBB1_10: # %cond.load10
440
+ ; CHECK-NEXT: vpbroadcastw 8(%rdi), %xmm1
441
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
442
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
443
+ ; CHECK-NEXT: testb $32, %al
444
+ ; CHECK-NEXT: je .LBB1_13
445
+ ; CHECK-NEXT: .LBB1_12: # %cond.load13
446
+ ; CHECK-NEXT: vpbroadcastw 10(%rdi), %xmm1
447
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
448
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
449
+ ; CHECK-NEXT: testb $64, %al
450
+ ; CHECK-NEXT: je .LBB1_15
451
+ ; CHECK-NEXT: .LBB1_14: # %cond.load16
452
+ ; CHECK-NEXT: vpbroadcastw 12(%rdi), %xmm1
453
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6],xmm0[7]
454
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
455
+ ; CHECK-NEXT: testb %al, %al
456
+ ; CHECK-NEXT: jns .LBB1_17
457
+ ; CHECK-NEXT: .LBB1_16: # %cond.load19
458
+ ; CHECK-NEXT: vpbroadcastw 14(%rdi), %xmm1
459
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6],xmm1[7]
460
+ ; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
461
+ ; CHECK-NEXT: testl $256, %eax # imm = 0x100
462
+ ; CHECK-NEXT: je .LBB1_19
463
+ ; CHECK-NEXT: .LBB1_18: # %cond.load22
464
+ ; CHECK-NEXT: vpbroadcastw 16(%rdi), %ymm1
465
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
466
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
467
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
468
+ ; CHECK-NEXT: testl $512, %eax # imm = 0x200
469
+ ; CHECK-NEXT: je .LBB1_21
470
+ ; CHECK-NEXT: .LBB1_20: # %cond.load25
471
+ ; CHECK-NEXT: vpbroadcastw 18(%rdi), %ymm1
472
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
473
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
474
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
475
+ ; CHECK-NEXT: testl $1024, %eax # imm = 0x400
476
+ ; CHECK-NEXT: je .LBB1_23
477
+ ; CHECK-NEXT: .LBB1_22: # %cond.load28
478
+ ; CHECK-NEXT: vpbroadcastw 20(%rdi), %ymm1
479
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
480
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
481
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
482
+ ; CHECK-NEXT: testl $2048, %eax # imm = 0x800
483
+ ; CHECK-NEXT: je .LBB1_25
484
+ ; CHECK-NEXT: .LBB1_24: # %cond.load31
485
+ ; CHECK-NEXT: vpbroadcastw 22(%rdi), %ymm1
486
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
487
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
488
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
489
+ ; CHECK-NEXT: testl $4096, %eax # imm = 0x1000
490
+ ; CHECK-NEXT: je .LBB1_27
491
+ ; CHECK-NEXT: .LBB1_26: # %cond.load34
492
+ ; CHECK-NEXT: vpbroadcastw 24(%rdi), %ymm1
493
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7,8,9,10,11],ymm1[12],ymm0[13,14,15]
494
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
495
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
496
+ ; CHECK-NEXT: testl $8192, %eax # imm = 0x2000
497
+ ; CHECK-NEXT: je .LBB1_29
498
+ ; CHECK-NEXT: .LBB1_28: # %cond.load37
499
+ ; CHECK-NEXT: vpbroadcastw 26(%rdi), %ymm1
500
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7,8,9,10,11,12],ymm1[13],ymm0[14,15]
501
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
502
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
503
+ ; CHECK-NEXT: testl $16384, %eax # imm = 0x4000
504
+ ; CHECK-NEXT: je .LBB1_31
505
+ ; CHECK-NEXT: .LBB1_30: # %cond.load40
506
+ ; CHECK-NEXT: vpbroadcastw 28(%rdi), %ymm1
507
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6],ymm0[7,8,9,10,11,12,13],ymm1[14],ymm0[15]
508
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
509
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
510
+ ; CHECK-NEXT: testw %ax, %ax
511
+ ; CHECK-NEXT: jns .LBB1_33
512
+ ; CHECK-NEXT: .LBB1_32: # %cond.load43
513
+ ; CHECK-NEXT: vpbroadcastw 30(%rdi), %ymm1
514
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
515
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7]
516
+ ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
517
+ ; CHECK-NEXT: testl $65536, %eax # imm = 0x10000
518
+ ; CHECK-NEXT: je .LBB1_35
519
+ ; CHECK-NEXT: .LBB1_34: # %cond.load46
520
+ ; CHECK-NEXT: vpbroadcastw 32(%rdi), %xmm1
521
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
522
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
523
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
524
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
525
+ ; CHECK-NEXT: testl $131072, %eax # imm = 0x20000
526
+ ; CHECK-NEXT: je .LBB1_37
527
+ ; CHECK-NEXT: .LBB1_36: # %cond.load49
528
+ ; CHECK-NEXT: vpbroadcastw 34(%rdi), %xmm1
529
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
530
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2,3,4,5,6,7]
531
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
532
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
533
+ ; CHECK-NEXT: testl $262144, %eax # imm = 0x40000
534
+ ; CHECK-NEXT: je .LBB1_39
535
+ ; CHECK-NEXT: .LBB1_38: # %cond.load52
536
+ ; CHECK-NEXT: vpbroadcastw 36(%rdi), %xmm1
537
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
538
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2],xmm2[3,4,5,6,7]
539
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
540
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
541
+ ; CHECK-NEXT: testl $524288, %eax # imm = 0x80000
542
+ ; CHECK-NEXT: je .LBB1_41
543
+ ; CHECK-NEXT: .LBB1_40: # %cond.load55
544
+ ; CHECK-NEXT: vpbroadcastw 38(%rdi), %xmm1
545
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
546
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[3],xmm2[4,5,6,7]
547
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
548
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
549
+ ; CHECK-NEXT: testl $1048576, %eax # imm = 0x100000
550
+ ; CHECK-NEXT: je .LBB1_43
551
+ ; CHECK-NEXT: .LBB1_42: # %cond.load58
552
+ ; CHECK-NEXT: vpbroadcastw 40(%rdi), %xmm1
553
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
554
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4],xmm2[5,6,7]
555
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
556
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
557
+ ; CHECK-NEXT: testl $2097152, %eax # imm = 0x200000
558
+ ; CHECK-NEXT: je .LBB1_45
559
+ ; CHECK-NEXT: .LBB1_44: # %cond.load61
560
+ ; CHECK-NEXT: vpbroadcastw 42(%rdi), %xmm1
561
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
562
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4],xmm1[5],xmm2[6,7]
563
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
564
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
565
+ ; CHECK-NEXT: testl $4194304, %eax # imm = 0x400000
566
+ ; CHECK-NEXT: je .LBB1_47
567
+ ; CHECK-NEXT: .LBB1_46: # %cond.load64
568
+ ; CHECK-NEXT: vpbroadcastw 44(%rdi), %xmm1
569
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
570
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
571
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
572
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
573
+ ; CHECK-NEXT: testl $8388608, %eax # imm = 0x800000
574
+ ; CHECK-NEXT: je .LBB1_49
575
+ ; CHECK-NEXT: .LBB1_48: # %cond.load67
576
+ ; CHECK-NEXT: vpbroadcastw 46(%rdi), %xmm1
577
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
578
+ ; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
579
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
580
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
581
+ ; CHECK-NEXT: testl $16777216, %eax # imm = 0x1000000
582
+ ; CHECK-NEXT: je .LBB1_51
583
+ ; CHECK-NEXT: .LBB1_50: # %cond.load70
584
+ ; CHECK-NEXT: vpbroadcastw 48(%rdi), %ymm1
585
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
586
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7],ymm1[8],ymm2[9,10,11,12,13,14,15]
587
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
588
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
589
+ ; CHECK-NEXT: testl $33554432, %eax # imm = 0x2000000
590
+ ; CHECK-NEXT: je .LBB1_53
591
+ ; CHECK-NEXT: .LBB1_52: # %cond.load73
592
+ ; CHECK-NEXT: vpbroadcastw 50(%rdi), %ymm1
593
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
594
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3,4,5,6,7,8],ymm1[9],ymm2[10,11,12,13,14,15]
595
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
596
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
597
+ ; CHECK-NEXT: testl $67108864, %eax # imm = 0x4000000
598
+ ; CHECK-NEXT: je .LBB1_55
599
+ ; CHECK-NEXT: .LBB1_54: # %cond.load76
600
+ ; CHECK-NEXT: vpbroadcastw 52(%rdi), %ymm1
601
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
602
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1],ymm1[2],ymm2[3,4,5,6,7,8,9],ymm1[10],ymm2[11,12,13,14,15]
603
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
604
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
605
+ ; CHECK-NEXT: testl $134217728, %eax # imm = 0x8000000
606
+ ; CHECK-NEXT: je .LBB1_57
607
+ ; CHECK-NEXT: .LBB1_56: # %cond.load79
608
+ ; CHECK-NEXT: vpbroadcastw 54(%rdi), %ymm1
609
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
610
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6,7,8,9,10],ymm1[11],ymm2[12,13,14,15]
611
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
612
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
613
+ ; CHECK-NEXT: testl $268435456, %eax # imm = 0x10000000
614
+ ; CHECK-NEXT: je .LBB1_59
615
+ ; CHECK-NEXT: .LBB1_58: # %cond.load82
616
+ ; CHECK-NEXT: vpbroadcastw 56(%rdi), %ymm1
617
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
618
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4],ymm2[5,6,7,8,9,10,11],ymm1[12],ymm2[13,14,15]
619
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
620
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
621
+ ; CHECK-NEXT: testl $536870912, %eax # imm = 0x20000000
622
+ ; CHECK-NEXT: je .LBB1_61
623
+ ; CHECK-NEXT: .LBB1_60: # %cond.load85
624
+ ; CHECK-NEXT: vpbroadcastw 58(%rdi), %ymm1
625
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
626
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4],ymm1[5],ymm2[6,7,8,9,10,11,12],ymm1[13],ymm2[14,15]
627
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
628
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
629
+ ; CHECK-NEXT: testl $1073741824, %eax # imm = 0x40000000
630
+ ; CHECK-NEXT: je .LBB1_63
631
+ ; CHECK-NEXT: .LBB1_62: # %cond.load88
632
+ ; CHECK-NEXT: vpbroadcastw 60(%rdi), %ymm1
633
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
634
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5],ymm1[6],ymm2[7,8,9,10,11,12,13],ymm1[14],ymm2[15]
635
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
636
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
637
+ ; CHECK-NEXT: testl $-2147483648, %eax # imm = 0x80000000
638
+ ; CHECK-NEXT: je .LBB1_65
639
+ ; CHECK-NEXT: .LBB1_64: # %cond.load91
640
+ ; CHECK-NEXT: vpbroadcastw 62(%rdi), %ymm1
641
+ ; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
642
+ ; CHECK-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7],ymm2[8,9,10,11,12,13,14],ymm1[15]
643
+ ; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
644
+ ; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
645
+ ; CHECK-NEXT: retq
646
+ %1 = call <32 x half > @llvm.masked.load.v32f16.p0 (ptr %p , i32 2 , <32 x i1 > %mask , <32 x half > <half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 , half 2 .0 >)
647
+ ret <32 x half > %1
648
+ }
649
+
311
650
declare <32 x half > @llvm.masked.load.v32f16.p0 (ptr , i32 , <32 x i1 >, <32 x half >)
0 commit comments