Skip to content

Commit f6f1bb6

Browse files
author
Igor Breger
committed
AVX512: Implemented intrinsics for valign.
Differential Revision: http://reviews.llvm.org/D12526 llvm-svn: 246551
1 parent 692cbb5 commit f6f1bb6

File tree

3 files changed

+107
-3
lines changed

3 files changed

+107
-3
lines changed

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,7 +1425,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
14251425
[IntrNoMem]>;
14261426
}
14271427

1428-
14291428
// Vector blend
14301429
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
14311430
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
@@ -5662,11 +5661,35 @@ let TargetPrefix = "x86" in {
56625661
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
56635662
llvm_i16_ty], [IntrNoMem]>;
56645663

5664+
def int_x86_avx512_mask_valign_q_256 :
5665+
GCCBuiltin<"__builtin_ia32_alignq256_mask">,
5666+
Intrinsic<[llvm_v4i64_ty],
5667+
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
5668+
llvm_i8_ty], [IntrNoMem]>;
5669+
5670+
def int_x86_avx512_mask_valign_d_256 :
5671+
GCCBuiltin<"__builtin_ia32_alignd256_mask">,
5672+
Intrinsic<[llvm_v8i32_ty],
5673+
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
5674+
llvm_i8_ty], [IntrNoMem]>;
5675+
5676+
def int_x86_avx512_mask_valign_q_128 :
5677+
GCCBuiltin<"__builtin_ia32_alignq128_mask">,
5678+
Intrinsic<[llvm_v2i64_ty],
5679+
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
5680+
llvm_i8_ty], [IntrNoMem]>;
5681+
5682+
def int_x86_avx512_mask_valign_d_128 :
5683+
GCCBuiltin<"__builtin_ia32_alignd128_mask">,
5684+
Intrinsic<[llvm_v4i32_ty],
5685+
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
5686+
llvm_i8_ty], [IntrNoMem]>;
5687+
56655688
def int_x86_avx512_mask_palignr_128 :
56665689
GCCBuiltin<"__builtin_ia32_palignr128_mask">,
56675690
Intrinsic<[llvm_v16i8_ty],
5668-
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty,
5669-
llvm_i16_ty], [IntrNoMem]>;
5691+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty,
5692+
llvm_i16_ty], [IntrNoMem]>;
56705693

56715694
def int_x86_avx512_mask_palignr_256 :
56725695
GCCBuiltin<"__builtin_ia32_palignr256_mask">,

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,8 +1211,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
12111211
X86ISD::UNPCKL, 0),
12121212
X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
12131213
X86ISD::UNPCKL, 0),
1214+
X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
1215+
X86ISD::VALIGN, 0),
1216+
X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
1217+
X86ISD::VALIGN, 0),
12141218
X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
12151219
X86ISD::VALIGN, 0),
1220+
X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK,
1221+
X86ISD::VALIGN, 0),
1222+
X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK,
1223+
X86ISD::VALIGN, 0),
12161224
X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
12171225
X86ISD::VALIGN, 0),
12181226
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),

llvm/test/CodeGen/X86/avx512vl-intrinsics.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4507,3 +4507,76 @@ define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8
45074507
%res2 = fadd <8 x float> %res, %res1
45084508
ret <8 x float> %res2
45094509
}
4510+
4511+
declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
4512+
4513+
define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
4514+
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
4515+
; CHECK: ## BB#0:
4516+
; CHECK-NEXT: movzbl %dil, %eax
4517+
; CHECK-NEXT: kmovw %eax, %k1
4518+
; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
4519+
; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
4520+
; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0
4521+
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
4522+
; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
4523+
; CHECK-NEXT: retq
4524+
%res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
4525+
%res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
4526+
%res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
4527+
%res3 = add <4 x i32> %res, %res1
4528+
%res4 = add <4 x i32> %res3, %res2
4529+
ret <4 x i32> %res4
4530+
}
4531+
4532+
declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
4533+
4534+
define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
4535+
; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
4536+
; CHECK: ## BB#0:
4537+
; CHECK-NEXT: movzbl %dil, %eax
4538+
; CHECK-NEXT: kmovw %eax, %k1
4539+
; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
4540+
; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0
4541+
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
4542+
; CHECK-NEXT: retq
4543+
%res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
4544+
%res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
4545+
%res2 = add <8 x i32> %res, %res1
4546+
ret <8 x i32> %res2
4547+
}
4548+
4549+
declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
4550+
4551+
define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
4552+
; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
4553+
; CHECK: ## BB#0:
4554+
; CHECK-NEXT: movzbl %dil, %eax
4555+
; CHECK-NEXT: kmovw %eax, %k1
4556+
; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
4557+
; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0
4558+
; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
4559+
; CHECK-NEXT: retq
4560+
%res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
4561+
%res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
4562+
%res2 = add <2 x i64> %res, %res1
4563+
ret <2 x i64> %res2
4564+
}
4565+
4566+
declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
4567+
4568+
define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
4569+
; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
4570+
; CHECK: ## BB#0:
4571+
; CHECK-NEXT: movzbl %dil, %eax
4572+
; CHECK-NEXT: kmovw %eax, %k1
4573+
; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
4574+
; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0
4575+
; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
4576+
; CHECK-NEXT: retq
4577+
%res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
4578+
%res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
4579+
%res2 = add <4 x i64> %res, %res1
4580+
ret <4 x i64> %res2
4581+
}
4582+

0 commit comments

Comments
 (0)