@@ -4507,3 +4507,76 @@ define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8
4507
4507
%res2 = fadd <8 x float > %res , %res1
4508
4508
ret <8 x float > %res2
4509
4509
}
4510
+
4511
+ declare <4 x i32 > @llvm.x86.avx512.mask.valign.d.128 (<4 x i32 >, <4 x i32 >, i32 , <4 x i32 >, i8 )
4512
+
4513
+ define <4 x i32 >@test_int_x86_avx512_mask_valign_d_128 (<4 x i32 > %x0 , <4 x i32 > %x1 ,<4 x i32 > %x3 , i8 %x4 ) {
4514
+ ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
4515
+ ; CHECK: ## BB#0:
4516
+ ; CHECK-NEXT: movzbl %dil, %eax
4517
+ ; CHECK-NEXT: kmovw %eax, %k1
4518
+ ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
4519
+ ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4520
+ ; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0
4521
+ ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
4522
+ ; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
4523
+ ; CHECK-NEXT: retq
4524
+ %res = call <4 x i32 > @llvm.x86.avx512.mask.valign.d.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , i32 22 , <4 x i32 > %x3 , i8 %x4 )
4525
+ %res1 = call <4 x i32 > @llvm.x86.avx512.mask.valign.d.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , i32 22 , <4 x i32 > %x3 , i8 -1 )
4526
+ %res2 = call <4 x i32 > @llvm.x86.avx512.mask.valign.d.128 (<4 x i32 > %x0 , <4 x i32 > %x1 , i32 22 , <4 x i32 > zeroinitializer ,i8 %x4 )
4527
+ %res3 = add <4 x i32 > %res , %res1
4528
+ %res4 = add <4 x i32 > %res3 , %res2
4529
+ ret <4 x i32 > %res4
4530
+ }
4531
+
4532
+ declare <8 x i32 > @llvm.x86.avx512.mask.valign.d.256 (<8 x i32 >, <8 x i32 >, i32 , <8 x i32 >, i8 )
4533
+
4534
+ define <8 x i32 >@test_int_x86_avx512_mask_valign_d_256 (<8 x i32 > %x0 , <8 x i32 > %x1 , <8 x i32 > %x3 , i8 %x4 ) {
4535
+ ; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
4536
+ ; CHECK: ## BB#0:
4537
+ ; CHECK-NEXT: movzbl %dil, %eax
4538
+ ; CHECK-NEXT: kmovw %eax, %k1
4539
+ ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
4540
+ ; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0
4541
+ ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4542
+ ; CHECK-NEXT: retq
4543
+ %res = call <8 x i32 > @llvm.x86.avx512.mask.valign.d.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , i32 22 , <8 x i32 > %x3 , i8 %x4 )
4544
+ %res1 = call <8 x i32 > @llvm.x86.avx512.mask.valign.d.256 (<8 x i32 > %x0 , <8 x i32 > %x1 , i32 22 , <8 x i32 > %x3 , i8 -1 )
4545
+ %res2 = add <8 x i32 > %res , %res1
4546
+ ret <8 x i32 > %res2
4547
+ }
4548
+
4549
+ declare <2 x i64 > @llvm.x86.avx512.mask.valign.q.128 (<2 x i64 >, <2 x i64 >, i32 , <2 x i64 >, i8 )
4550
+
4551
+ define <2 x i64 >@test_int_x86_avx512_mask_valign_q_128 (<2 x i64 > %x0 , <2 x i64 > %x1 , <2 x i64 > %x3 , i8 %x4 ) {
4552
+ ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
4553
+ ; CHECK: ## BB#0:
4554
+ ; CHECK-NEXT: movzbl %dil, %eax
4555
+ ; CHECK-NEXT: kmovw %eax, %k1
4556
+ ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
4557
+ ; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0
4558
+ ; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
4559
+ ; CHECK-NEXT: retq
4560
+ %res = call <2 x i64 > @llvm.x86.avx512.mask.valign.q.128 (<2 x i64 > %x0 , <2 x i64 > %x1 , i32 22 , <2 x i64 > %x3 , i8 %x4 )
4561
+ %res1 = call <2 x i64 > @llvm.x86.avx512.mask.valign.q.128 (<2 x i64 > %x0 , <2 x i64 > %x1 , i32 22 , <2 x i64 > %x3 , i8 -1 )
4562
+ %res2 = add <2 x i64 > %res , %res1
4563
+ ret <2 x i64 > %res2
4564
+ }
4565
+
4566
+ declare <4 x i64 > @llvm.x86.avx512.mask.valign.q.256 (<4 x i64 >, <4 x i64 >, i32 , <4 x i64 >, i8 )
4567
+
4568
+ define <4 x i64 >@test_int_x86_avx512_mask_valign_q_256 (<4 x i64 > %x0 , <4 x i64 > %x1 , <4 x i64 > %x3 , i8 %x4 ) {
4569
+ ; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
4570
+ ; CHECK: ## BB#0:
4571
+ ; CHECK-NEXT: movzbl %dil, %eax
4572
+ ; CHECK-NEXT: kmovw %eax, %k1
4573
+ ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
4574
+ ; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0
4575
+ ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
4576
+ ; CHECK-NEXT: retq
4577
+ %res = call <4 x i64 > @llvm.x86.avx512.mask.valign.q.256 (<4 x i64 > %x0 , <4 x i64 > %x1 , i32 22 , <4 x i64 > %x3 , i8 %x4 )
4578
+ %res1 = call <4 x i64 > @llvm.x86.avx512.mask.valign.q.256 (<4 x i64 > %x0 , <4 x i64 > %x1 , i32 22 , <4 x i64 > %x3 , i8 -1 )
4579
+ %res2 = add <4 x i64 > %res , %res1
4580
+ ret <4 x i64 > %res2
4581
+ }
4582
+
0 commit comments