Skip to content

Commit d01e0f7

Browse files
committed
[InstCombine][X86] Add vpermv/vpermv3 test coverage for #106413
1 parent 82a5ab7 commit d01e0f7

File tree

2 files changed

+310
-0
lines changed

2 files changed

+310
-0
lines changed

llvm/test/Transforms/InstCombine/X86/x86-vperm.ll

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,17 @@ define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passt
8989
ret <8 x i32> %3
9090
}
9191

92+
define <8 x i32> @demandedbit_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %a1) {
93+
; CHECK-LABEL: @demandedbit_test_permvar_si_256_mask(
94+
; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[A1:%.*]], <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
95+
; CHECK-NEXT: [[S:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[M]])
96+
; CHECK-NEXT: ret <8 x i32> [[S]]
97+
;
98+
%m = or <8 x i32> %a1, <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
99+
%s = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %m)
100+
ret <8 x i32> %s
101+
}
102+
92103
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
93104

94105
define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
@@ -177,6 +188,17 @@ define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float>
177188
ret <8 x float> %3
178189
}
179190

191+
define <8 x float> @demandedbit_test_permvar_sf_256_mask(<8 x float> %a0, <8 x i32> %a1) {
192+
; CHECK-LABEL: @demandedbit_test_permvar_sf_256_mask(
193+
; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[A1:%.*]], <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
194+
; CHECK-NEXT: [[S:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[M]])
195+
; CHECK-NEXT: ret <8 x float> [[S]]
196+
;
197+
%m = or <8 x i32> %a1, <i32 0, i32 8, i32 -8, i32 16, i32 -16, i32 32, i32 -32, i32 64>
198+
%s = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %m)
199+
ret <8 x float> %s
200+
}
201+
180202
declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
181203

182204
define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
@@ -273,6 +295,17 @@ define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passt
273295
ret <4 x i64> %3
274296
}
275297

298+
define <4 x i64> @demandedbits_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %a1) {
299+
; CHECK-LABEL: @demandedbits_test_permvar_di_256_mask(
300+
; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[A1:%.*]], <i64 0, i64 4, i64 -4, i64 8>
301+
; CHECK-NEXT: [[S:%.*]] = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> [[A0:%.*]], <4 x i64> [[M]])
302+
; CHECK-NEXT: ret <4 x i64> [[S]]
303+
;
304+
%m = or <4 x i64> %a1, <i64 0, i64 4, i64 -4, i64 8>
305+
%s = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> %m)
306+
ret <4 x i64> %s
307+
}
308+
276309
declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
277310

278311
define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
@@ -369,6 +402,17 @@ define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x doubl
369402
ret <4 x double> %3
370403
}
371404

405+
define <4 x double> @demandedbits_test_permvar_df_256_mask(<4 x double> %a0, <4 x i64> %a1) {
406+
; CHECK-LABEL: @demandedbits_test_permvar_df_256_mask(
407+
; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[A1:%.*]], <i64 0, i64 4, i64 -4, i64 8>
408+
; CHECK-NEXT: [[S:%.*]] = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> [[A0:%.*]], <4 x i64> [[M]])
409+
; CHECK-NEXT: ret <4 x double> [[S]]
410+
;
411+
%m = or <4 x i64> %a1, <i64 0, i64 4, i64 -4, i64 8>
412+
%s = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> %m)
413+
ret <4 x double> %s
414+
}
415+
372416
declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
373417

374418
define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
@@ -457,6 +501,17 @@ define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %pa
457501
ret <16 x i32> %3
458502
}
459503

504+
define <16 x i32> @demandedbit_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %a1) {
505+
; CHECK-LABEL: @demandedbit_test_permvar_si_512_mask(
506+
; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[A1:%.*]], <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
507+
; CHECK-NEXT: [[S:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[A0:%.*]], <16 x i32> [[M]])
508+
; CHECK-NEXT: ret <16 x i32> [[S]]
509+
;
510+
%m = or <16 x i32> %a1, <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
511+
%s = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> %m)
512+
ret <16 x i32> %s
513+
}
514+
460515
declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
461516

462517
define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
@@ -545,6 +600,17 @@ define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x floa
545600
ret <16 x float> %3
546601
}
547602

603+
define <16 x float> @demandedbit_test_permvar_sf_512_mask(<16 x float> %a0, <16 x i32> %a1) {
604+
; CHECK-LABEL: @demandedbit_test_permvar_sf_512_mask(
605+
; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[A1:%.*]], <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
606+
; CHECK-NEXT: [[S:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[A0:%.*]], <16 x i32> [[M]])
607+
; CHECK-NEXT: ret <16 x float> [[S]]
608+
;
609+
%m = or <16 x i32> %a1, <i32 0, i32 16, i32 -16, i32 32, i32 -32, i32 64, i32 -64, i32 128, i32 -128, i32 256, i32 -256, i32 512, i32 -512, i32 1024, i32 -1024, i32 2048>
610+
%s = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> %m)
611+
ret <16 x float> %s
612+
}
613+
548614
declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
549615

550616
define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
@@ -633,6 +699,17 @@ define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passt
633699
ret <8 x i64> %3
634700
}
635701

702+
define <8 x i64> @demandedbit_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %a1) {
703+
; CHECK-LABEL: @demandedbit_test_permvar_di_512_mask(
704+
; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[A1:%.*]], <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
705+
; CHECK-NEXT: [[S:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[A0:%.*]], <8 x i64> [[M]])
706+
; CHECK-NEXT: ret <8 x i64> [[S]]
707+
;
708+
%m = or <8 x i64> %a1, <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
709+
%s = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> %m)
710+
ret <8 x i64> %s
711+
}
712+
636713
declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
637714

638715
define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
@@ -721,6 +798,17 @@ define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x doubl
721798
ret <8 x double> %3
722799
}
723800

801+
define <8 x double> @demandedbit_test_permvar_df_512_mask(<8 x double> %a0, <8 x i64> %a1) {
802+
; CHECK-LABEL: @demandedbit_test_permvar_df_512_mask(
803+
; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[A1:%.*]], <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
804+
; CHECK-NEXT: [[S:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[A0:%.*]], <8 x i64> [[M]])
805+
; CHECK-NEXT: ret <8 x double> [[S]]
806+
;
807+
%m = or <8 x i64> %a1, <i64 0, i64 8, i64 -8, i64 16, i64 -16, i64 32, i64 -32, i64 64>
808+
%s = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> %m)
809+
ret <8 x double> %s
810+
}
811+
724812
declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
725813

726814
define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
@@ -809,6 +897,17 @@ define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passt
809897
ret <8 x i16> %3
810898
}
811899

900+
define <8 x i16> @demandedbit_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %a1) {
901+
; CHECK-LABEL: @demandedbit_test_permvar_hi_128_mask(
902+
; CHECK-NEXT: [[M:%.*]] = or <8 x i16> [[A1:%.*]], <i16 0, i16 8, i16 -8, i16 16, i16 -16, i16 32, i16 -32, i16 64>
903+
; CHECK-NEXT: [[S:%.*]] = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> [[A0:%.*]], <8 x i16> [[M]])
904+
; CHECK-NEXT: ret <8 x i16> [[S]]
905+
;
906+
%m = or <8 x i16> %a1, <i16 0, i16 8, i16 -8, i16 16, i16 -16, i16 32, i16 -32, i16 64>
907+
%s = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> %m)
908+
ret <8 x i16> %s
909+
}
910+
812911
declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
813912

814913
define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
@@ -897,6 +996,17 @@ define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %pa
897996
ret <16 x i16> %3
898997
}
899998

999+
define <16 x i16> @demandedbit_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %a1) {
1000+
; CHECK-LABEL: @demandedbit_test_permvar_hi_256_mask(
1001+
; CHECK-NEXT: [[M:%.*]] = or <16 x i16> [[A1:%.*]], <i16 0, i16 16, i16 -16, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048>
1002+
; CHECK-NEXT: [[S:%.*]] = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> [[A0:%.*]], <16 x i16> [[M]])
1003+
; CHECK-NEXT: ret <16 x i16> [[S]]
1004+
;
1005+
%m = or <16 x i16> %a1, <i16 0, i16 16, i16 -16, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048>
1006+
%s = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> %m)
1007+
ret <16 x i16> %s
1008+
}
1009+
9001010
declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
9011011

9021012
define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
@@ -985,6 +1095,17 @@ define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %pa
9851095
ret <32 x i16> %3
9861096
}
9871097

1098+
define <32 x i16> @demandedbit_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %a1) {
1099+
; CHECK-LABEL: @demandedbit_test_permvar_hi_512_mask(
1100+
; CHECK-NEXT: [[M:%.*]] = or <32 x i16> [[A1:%.*]], <i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096, i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096>
1101+
; CHECK-NEXT: [[S:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[A0:%.*]], <32 x i16> [[M]])
1102+
; CHECK-NEXT: ret <32 x i16> [[S]]
1103+
;
1104+
%m = or <32 x i16> %a1, <i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096, i16 0, i16 32, i16 -32, i16 64, i16 -64, i16 128, i16 -128, i16 256, i16 -256, i16 512, i16 -512, i16 1024, i16 -1024, i16 2048, i16 -2048, i16 4096>
1105+
%s = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> %m)
1106+
ret <32 x i16> %s
1107+
}
1108+
9881109
declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
9891110

9901111
define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
@@ -1073,6 +1194,17 @@ define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passt
10731194
ret <16 x i8> %3
10741195
}
10751196

1197+
define <16 x i8> @demandedbit_test_permvar_qi_129_mask(<16 x i8> %a0, <16 x i8> %a1) {
1198+
; CHECK-LABEL: @demandedbit_test_permvar_qi_129_mask(
1199+
; CHECK-NEXT: [[M:%.*]] = or <16 x i8> [[A1:%.*]], <i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64>
1200+
; CHECK-NEXT: [[S:%.*]] = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> [[A0:%.*]], <16 x i8> [[M]])
1201+
; CHECK-NEXT: ret <16 x i8> [[S]]
1202+
;
1203+
%m = or <16 x i8> %a1, <i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 16, i8 -16, i8 32, i8 -32, i8 64, i8 -64>
1204+
%s = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> %m)
1205+
ret <16 x i8> %s
1206+
}
1207+
10761208
declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
10771209

10781210
define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
@@ -1161,6 +1293,17 @@ define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passt
11611293
ret <32 x i8> %3
11621294
}
11631295

1296+
define <32 x i8> @demandedbit_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %a1) {
1297+
; CHECK-LABEL: @demandedbit_test_permvar_qi_256_mask(
1298+
; CHECK-NEXT: [[M:%.*]] = or <32 x i8> [[A1:%.*]], <i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
1299+
; CHECK-NEXT: [[S:%.*]] = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> [[A0:%.*]], <32 x i8> [[M]])
1300+
; CHECK-NEXT: ret <32 x i8> [[S]]
1301+
;
1302+
%m = or <32 x i8> %a1, <i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 128, i8 -128, i8 256, i8 -256, i8 512, i8 -512, i8 1024, i8 -1024, i8 2048, i8 -2048, i8 4096, i8 0, i8 32, i8 -32, i8 64, i8 -64, i8 128, i8 -128, i8 256, i8 -256, i8 512, i8 -512, i8 1024, i8 -1024, i8 2048, i8 -2048, i8 4096>
1303+
%s = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> %m)
1304+
ret <32 x i8> %s
1305+
}
1306+
11641307
declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
11651308

11661309
define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
@@ -1248,3 +1391,14 @@ define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passt
12481391
%3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
12491392
ret <64 x i8> %3
12501393
}
1394+
1395+
define <64 x i8> @demandedbit_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %a1) {
1396+
; CHECK-LABEL: @demandedbit_test_permvar_qi_512_mask(
1397+
; CHECK-NEXT: [[M:%.*]] = or <64 x i8> [[A1:%.*]], <i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128, i8 -128, i8 0, i8 64, i8 -64, i8 -128>
1398+
; CHECK-NEXT: [[S:%.*]] = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> [[A0:%.*]], <64 x i8> [[M]])
1399+
; CHECK-NEXT: ret <64 x i8> [[S]]
1400+
;
1401+
%m = or <64 x i8> %a1, <i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128, i8 -128, i8 0, i8 64, i8 -64, i8 128>
1402+
%s = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> %m)
1403+
ret <64 x i8> %s
1404+
}

0 commit comments

Comments
 (0)