Skip to content

Commit ea81e31

Browse files
authored
[X86][AVX10] Allow AVX10 use VBMI2 instructions (llvm#73276)
1 parent 8c2537f commit ea81e31

File tree

5 files changed

+232
-170
lines changed

5 files changed

+232
-170
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,20 +1970,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
19701970
}
19711971

19721972
if (Subtarget.hasVBMI2()) {
1973-
for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1974-
MVT::v16i16, MVT::v8i32, MVT::v4i64,
1975-
MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1973+
for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
19761974
setOperationAction(ISD::FSHL, VT, Custom);
19771975
setOperationAction(ISD::FSHR, VT, Custom);
19781976
}
19791977

19801978
setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1981-
setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1982-
setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
19831979
setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
19841980
}
19851981
}// useAVX512Regs
19861982

1983+
if (Subtarget.hasVBMI2()) {
1984+
for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,
1985+
MVT::v4i64}) {
1986+
setOperationAction(ISD::FSHL, VT, Custom);
1987+
setOperationAction(ISD::FSHR, VT, Custom);
1988+
}
1989+
}
1990+
19871991
// This block controls legalization for operations that don't have
19881992
// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
19891993
// narrower widths.

llvm/test/CodeGen/X86/vector-fshl-128.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
1010
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
1111
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
12+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX512VLVBMI2
13+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX512VLVBMI2
1214
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
1315
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
1416

llvm/test/CodeGen/X86/vector-fshl-256.ll

Lines changed: 116 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
77
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
88
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
9-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
9+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2
10+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX10,AVX10_256
11+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX10,AVX512VLVBMI2
1012
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1
1113
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2
1214

@@ -116,10 +118,10 @@ define <4 x i64> @var_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %amt)
116118
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
117119
; AVX512VLBW-NEXT: retq
118120
;
119-
; AVX512VLVBMI2-LABEL: var_funnnel_v4i64:
120-
; AVX512VLVBMI2: # %bb.0:
121-
; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
122-
; AVX512VLVBMI2-NEXT: retq
121+
; AVX10-LABEL: var_funnnel_v4i64:
122+
; AVX10: # %bb.0:
123+
; AVX10-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
124+
; AVX10-NEXT: retq
123125
;
124126
; XOPAVX1-LABEL: var_funnnel_v4i64:
125127
; XOPAVX1: # %bb.0:
@@ -271,10 +273,10 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %amt)
271273
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
272274
; AVX512VLBW-NEXT: retq
273275
;
274-
; AVX512VLVBMI2-LABEL: var_funnnel_v8i32:
275-
; AVX512VLVBMI2: # %bb.0:
276-
; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
277-
; AVX512VLVBMI2-NEXT: retq
276+
; AVX10-LABEL: var_funnnel_v8i32:
277+
; AVX10: # %bb.0:
278+
; AVX10-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
279+
; AVX10-NEXT: retq
278280
;
279281
; XOPAVX1-LABEL: var_funnnel_v8i32:
280282
; XOPAVX1: # %bb.0:
@@ -424,10 +426,10 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %
424426
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
425427
; AVX512VLBW-NEXT: retq
426428
;
427-
; AVX512VLVBMI2-LABEL: var_funnnel_v16i16:
428-
; AVX512VLVBMI2: # %bb.0:
429-
; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
430-
; AVX512VLVBMI2-NEXT: retq
429+
; AVX10-LABEL: var_funnnel_v16i16:
430+
; AVX10: # %bb.0:
431+
; AVX10-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
432+
; AVX10-NEXT: retq
431433
;
432434
; XOPAVX1-LABEL: var_funnnel_v16i16:
433435
; XOPAVX1: # %bb.0:
@@ -691,6 +693,21 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt)
691693
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
692694
; AVX512VLVBMI2-NEXT: retq
693695
;
696+
; AVX10_256-LABEL: var_funnnel_v32i8:
697+
; AVX10_256: # %bb.0:
698+
; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
699+
; AVX10_256-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm2
700+
; AVX10_256-NEXT: vpxor %xmm4, %xmm4, %xmm4
701+
; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
702+
; AVX10_256-NEXT: vpsllvw %ymm5, %ymm3, %ymm3
703+
; AVX10_256-NEXT: vpsrlw $8, %ymm3, %ymm3
704+
; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
705+
; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
706+
; AVX10_256-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
707+
; AVX10_256-NEXT: vpsrlw $8, %ymm0, %ymm0
708+
; AVX10_256-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
709+
; AVX10_256-NEXT: retq
710+
;
694711
; XOPAVX1-LABEL: var_funnnel_v32i8:
695712
; XOPAVX1: # %bb.0:
696713
; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
@@ -824,11 +841,11 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %
824841
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
825842
; AVX512VLBW-NEXT: retq
826843
;
827-
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i64:
828-
; AVX512VLVBMI2: # %bb.0:
829-
; AVX512VLVBMI2-NEXT: vpbroadcastq %xmm2, %ymm2
830-
; AVX512VLVBMI2-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
831-
; AVX512VLVBMI2-NEXT: retq
844+
; AVX10-LABEL: splatvar_funnnel_v4i64:
845+
; AVX10: # %bb.0:
846+
; AVX10-NEXT: vpbroadcastq %xmm2, %ymm2
847+
; AVX10-NEXT: vpshldvq %ymm2, %ymm1, %ymm0
848+
; AVX10-NEXT: retq
832849
;
833850
; XOPAVX1-LABEL: splatvar_funnnel_v4i64:
834851
; XOPAVX1: # %bb.0:
@@ -942,11 +959,11 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
942959
; AVX512VLBW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm3[1,3],ymm0[5,7],ymm3[5,7]
943960
; AVX512VLBW-NEXT: retq
944961
;
945-
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i32:
946-
; AVX512VLVBMI2: # %bb.0:
947-
; AVX512VLVBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
948-
; AVX512VLVBMI2-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
949-
; AVX512VLVBMI2-NEXT: retq
962+
; AVX10-LABEL: splatvar_funnnel_v8i32:
963+
; AVX10: # %bb.0:
964+
; AVX10-NEXT: vpbroadcastd %xmm2, %ymm2
965+
; AVX10-NEXT: vpshldvd %ymm2, %ymm1, %ymm0
966+
; AVX10-NEXT: retq
950967
;
951968
; XOPAVX1-LABEL: splatvar_funnnel_v8i32:
952969
; XOPAVX1: # %bb.0:
@@ -1063,11 +1080,11 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
10631080
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
10641081
; AVX512VLBW-NEXT: retq
10651082
;
1066-
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i16:
1067-
; AVX512VLVBMI2: # %bb.0:
1068-
; AVX512VLVBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
1069-
; AVX512VLVBMI2-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
1070-
; AVX512VLVBMI2-NEXT: retq
1083+
; AVX10-LABEL: splatvar_funnnel_v16i16:
1084+
; AVX10: # %bb.0:
1085+
; AVX10-NEXT: vpbroadcastw %xmm2, %ymm2
1086+
; AVX10-NEXT: vpshldvw %ymm2, %ymm1, %ymm0
1087+
; AVX10-NEXT: retq
10711088
;
10721089
; XOPAVX1-LABEL: splatvar_funnnel_v16i16:
10731090
; XOPAVX1: # %bb.0:
@@ -1197,17 +1214,17 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
11971214
; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
11981215
; AVX512VLBW-NEXT: retq
11991216
;
1200-
; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1201-
; AVX512VLVBMI2: # %bb.0:
1202-
; AVX512VLVBMI2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1203-
; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1204-
; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm3, %ymm3
1205-
; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm3, %ymm3
1206-
; AVX512VLVBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1207-
; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0
1208-
; AVX512VLVBMI2-NEXT: vpsrlw $8, %ymm0, %ymm0
1209-
; AVX512VLVBMI2-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1210-
; AVX512VLVBMI2-NEXT: retq
1217+
; AVX10-LABEL: splatvar_funnnel_v32i8:
1218+
; AVX10: # %bb.0:
1219+
; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1220+
; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1221+
; AVX10-NEXT: vpsllw %xmm2, %ymm3, %ymm3
1222+
; AVX10-NEXT: vpsrlw $8, %ymm3, %ymm3
1223+
; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1224+
; AVX10-NEXT: vpsllw %xmm2, %ymm0, %ymm0
1225+
; AVX10-NEXT: vpsrlw $8, %ymm0, %ymm0
1226+
; AVX10-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
1227+
; AVX10-NEXT: retq
12111228
;
12121229
; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
12131230
; XOPAVX1: # %bb.0:
@@ -1438,25 +1455,25 @@ define void @fancierRotate2(ptr %arr, ptr %control, i32 %rot0, i32 %rot1) {
14381455
; AVX512VLBW-NEXT: vzeroupper
14391456
; AVX512VLBW-NEXT: retq
14401457
;
1441-
; AVX512VLVBMI2-LABEL: fancierRotate2:
1442-
; AVX512VLVBMI2: # %bb.0: # %entry
1443-
; AVX512VLVBMI2-NEXT: vpbroadcastd %edx, %ymm0
1444-
; AVX512VLVBMI2-NEXT: vpbroadcastd %ecx, %ymm1
1445-
; AVX512VLVBMI2-NEXT: movq $-1024, %rax # imm = 0xFC00
1446-
; AVX512VLVBMI2-NEXT: .p2align 4, 0x90
1447-
; AVX512VLVBMI2-NEXT: .LBB8_1: # %loop
1448-
; AVX512VLVBMI2-NEXT: # =>This Inner Loop Header: Depth=1
1449-
; AVX512VLVBMI2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
1450-
; AVX512VLVBMI2-NEXT: vptestnmb %xmm2, %xmm2, %k1
1451-
; AVX512VLVBMI2-NEXT: vpblendmd %ymm0, %ymm1, %ymm2 {%k1}
1452-
; AVX512VLVBMI2-NEXT: vmovdqu 4096(%rdi,%rax,4), %ymm3
1453-
; AVX512VLVBMI2-NEXT: vprolvd %ymm2, %ymm3, %ymm2
1454-
; AVX512VLVBMI2-NEXT: vmovdqu %ymm2, 4096(%rdi,%rax,4)
1455-
; AVX512VLVBMI2-NEXT: addq $8, %rax
1456-
; AVX512VLVBMI2-NEXT: jne .LBB8_1
1457-
; AVX512VLVBMI2-NEXT: # %bb.2: # %exit
1458-
; AVX512VLVBMI2-NEXT: vzeroupper
1459-
; AVX512VLVBMI2-NEXT: retq
1458+
; AVX10-LABEL: fancierRotate2:
1459+
; AVX10: # %bb.0: # %entry
1460+
; AVX10-NEXT: vpbroadcastd %edx, %ymm0
1461+
; AVX10-NEXT: vpbroadcastd %ecx, %ymm1
1462+
; AVX10-NEXT: movq $-1024, %rax # imm = 0xFC00
1463+
; AVX10-NEXT: .p2align 4, 0x90
1464+
; AVX10-NEXT: .LBB8_1: # %loop
1465+
; AVX10-NEXT: # =>This Inner Loop Header: Depth=1
1466+
; AVX10-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
1467+
; AVX10-NEXT: vptestnmb %xmm2, %xmm2, %k1
1468+
; AVX10-NEXT: vpblendmd %ymm0, %ymm1, %ymm2 {%k1}
1469+
; AVX10-NEXT: vmovdqu 4096(%rdi,%rax,4), %ymm3
1470+
; AVX10-NEXT: vprolvd %ymm2, %ymm3, %ymm2
1471+
; AVX10-NEXT: vmovdqu %ymm2, 4096(%rdi,%rax,4)
1472+
; AVX10-NEXT: addq $8, %rax
1473+
; AVX10-NEXT: jne .LBB8_1
1474+
; AVX10-NEXT: # %bb.2: # %exit
1475+
; AVX10-NEXT: vzeroupper
1476+
; AVX10-NEXT: retq
14601477
;
14611478
; XOPAVX1-LABEL: fancierRotate2:
14621479
; XOPAVX1: # %bb.0: # %entry
@@ -1609,10 +1626,10 @@ define <4 x i64> @constant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
16091626
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
16101627
; AVX512VLBW-NEXT: retq
16111628
;
1612-
; AVX512VLVBMI2-LABEL: constant_funnnel_v4i64:
1613-
; AVX512VLVBMI2: # %bb.0:
1614-
; AVX512VLVBMI2-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1615-
; AVX512VLVBMI2-NEXT: retq
1629+
; AVX10-LABEL: constant_funnnel_v4i64:
1630+
; AVX10: # %bb.0:
1631+
; AVX10-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1632+
; AVX10-NEXT: retq
16161633
;
16171634
; XOPAVX1-LABEL: constant_funnnel_v4i64:
16181635
; XOPAVX1: # %bb.0:
@@ -1707,10 +1724,10 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
17071724
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
17081725
; AVX512VLBW-NEXT: retq
17091726
;
1710-
; AVX512VLVBMI2-LABEL: constant_funnnel_v8i32:
1711-
; AVX512VLVBMI2: # %bb.0:
1712-
; AVX512VLVBMI2-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1713-
; AVX512VLVBMI2-NEXT: retq
1727+
; AVX10-LABEL: constant_funnnel_v8i32:
1728+
; AVX10: # %bb.0:
1729+
; AVX10-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1730+
; AVX10-NEXT: retq
17141731
;
17151732
; XOPAVX1-LABEL: constant_funnnel_v8i32:
17161733
; XOPAVX1: # %bb.0:
@@ -1810,10 +1827,10 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
18101827
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
18111828
; AVX512VLBW-NEXT: retq
18121829
;
1813-
; AVX512VLVBMI2-LABEL: constant_funnnel_v16i16:
1814-
; AVX512VLVBMI2: # %bb.0:
1815-
; AVX512VLVBMI2-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1816-
; AVX512VLVBMI2-NEXT: retq
1830+
; AVX10-LABEL: constant_funnnel_v16i16:
1831+
; AVX10: # %bb.0:
1832+
; AVX10-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
1833+
; AVX10-NEXT: retq
18171834
;
18181835
; XOPAVX1-LABEL: constant_funnnel_v16i16:
18191836
; XOPAVX1: # %bb.0:
@@ -1944,6 +1961,17 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
19441961
; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0
19451962
; AVX512VLVBMI2-NEXT: retq
19461963
;
1964+
; AVX10_256-LABEL: constant_funnnel_v32i8:
1965+
; AVX10_256: # %bb.0:
1966+
; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1967+
; AVX10_256-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
1968+
; AVX10_256-NEXT: vpsrlw $8, %ymm2, %ymm2
1969+
; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1970+
; AVX10_256-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1971+
; AVX10_256-NEXT: vpsrlw $8, %ymm0, %ymm0
1972+
; AVX10_256-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
1973+
; AVX10_256-NEXT: retq
1974+
;
19471975
; XOPAVX1-LABEL: constant_funnnel_v32i8:
19481976
; XOPAVX1: # %bb.0:
19491977
; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -2044,10 +2072,10 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi
20442072
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
20452073
; AVX512VLBW-NEXT: retq
20462074
;
2047-
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64:
2048-
; AVX512VLVBMI2: # %bb.0:
2049-
; AVX512VLVBMI2-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0
2050-
; AVX512VLVBMI2-NEXT: retq
2075+
; AVX10-LABEL: splatconstant_funnnel_v4i64:
2076+
; AVX10: # %bb.0:
2077+
; AVX10-NEXT: vpshldq $14, %ymm1, %ymm0, %ymm0
2078+
; AVX10-NEXT: retq
20512079
;
20522080
; XOPAVX1-LABEL: splatconstant_funnnel_v4i64:
20532081
; XOPAVX1: # %bb.0:
@@ -2129,10 +2157,10 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi
21292157
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
21302158
; AVX512VLBW-NEXT: retq
21312159
;
2132-
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32:
2133-
; AVX512VLVBMI2: # %bb.0:
2134-
; AVX512VLVBMI2-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0
2135-
; AVX512VLVBMI2-NEXT: retq
2160+
; AVX10-LABEL: splatconstant_funnnel_v8i32:
2161+
; AVX10: # %bb.0:
2162+
; AVX10-NEXT: vpshldd $4, %ymm1, %ymm0, %ymm0
2163+
; AVX10-NEXT: retq
21362164
;
21372165
; XOPAVX1-LABEL: splatconstant_funnnel_v8i32:
21382166
; XOPAVX1: # %bb.0:
@@ -2214,10 +2242,10 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no
22142242
; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0
22152243
; AVX512VLBW-NEXT: retq
22162244
;
2217-
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16:
2218-
; AVX512VLVBMI2: # %bb.0:
2219-
; AVX512VLVBMI2-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0
2220-
; AVX512VLVBMI2-NEXT: retq
2245+
; AVX10-LABEL: splatconstant_funnnel_v16i16:
2246+
; AVX10: # %bb.0:
2247+
; AVX10-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0
2248+
; AVX10-NEXT: retq
22212249
;
22222250
; XOPAVX1-LABEL: splatconstant_funnnel_v16i16:
22232251
; XOPAVX1: # %bb.0:
@@ -2309,12 +2337,12 @@ define <32 x i8> @splatconstant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwi
23092337
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
23102338
; AVX512VLBW-NEXT: retq
23112339
;
2312-
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
2313-
; AVX512VLVBMI2: # %bb.0:
2314-
; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
2315-
; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
2316-
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
2317-
; AVX512VLVBMI2-NEXT: retq
2340+
; AVX10-LABEL: splatconstant_funnnel_v32i8:
2341+
; AVX10: # %bb.0:
2342+
; AVX10-NEXT: vpsllw $4, %ymm0, %ymm2
2343+
; AVX10-NEXT: vpsrlw $4, %ymm1, %ymm0
2344+
; AVX10-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
2345+
; AVX10-NEXT: retq
23182346
;
23192347
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
23202348
; XOPAVX1: # %bb.0:

llvm/test/CodeGen/X86/vector-fshr-128.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2 | FileCheck %s --check-prefixes=AVX512VBMI2
1010
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VLBW
1111
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=AVX512VLVBMI2
12+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-256 | FileCheck %s --check-prefixes=AVX512VLVBMI2
13+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 | FileCheck %s --check-prefixes=AVX512VLVBMI2
1214
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
1315
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
1416

0 commit comments

Comments
 (0)