Skip to content

Commit c47f3e8

Browse files
committed
[X86] combineSelect - Fold select(pcmpeq(and(X,Pow2),0),A,B) -> select(pcmpeq(and(X,Pow2),Pow2),B,A)
Matches what we already do in LowerVSETCC to reuse an existing constant Fixes #110875
1 parent fa3258e commit c47f3e8

File tree

2 files changed

+57
-45
lines changed

2 files changed

+57
-45
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46915,6 +46915,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4691546915
return DAG.getNode(N->getOpcode(), DL, VT,
4691646916
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
4691746917

46918+
// select(pcmpeq(and(X,Pow2),0),A,B) -> select(pcmpeq(and(X,Pow2),Pow2),B,A)
46919+
if (Cond.getOpcode() == X86ISD::PCMPEQ &&
46920+
Cond.getOperand(0).getOpcode() == ISD::AND &&
46921+
ISD::isBuildVectorAllZeros(Cond.getOperand(1).getNode()) &&
46922+
isConstantPowerOf2(Cond.getOperand(0).getOperand(1),
46923+
Cond.getScalarValueSizeInBits(),
46924+
/*AllowUndefs=*/true) &&
46925+
Cond.hasOneUse()) {
46926+
Cond = DAG.getNode(X86ISD::PCMPEQ, DL, CondVT, Cond.getOperand(0),
46927+
Cond.getOperand(0).getOperand(1));
46928+
return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
46929+
}
46930+
4691846931
// pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the
4691946932
// signbit.
4692046933
if (Cond.getOpcode() == X86ISD::PCMPGT &&

llvm/test/CodeGen/X86/vselect-pcmp.ll

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -643,18 +643,18 @@ define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
643643
;
644644
; AVX512F-LABEL: blend_splat1_mask_cond_v16i8:
645645
; AVX512F: # %bb.0:
646-
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
647-
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
646+
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
647+
; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0
648648
; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
649-
; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
649+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
650650
; AVX512F-NEXT: retq
651651
;
652652
; AVX512VL-LABEL: blend_splat1_mask_cond_v16i8:
653653
; AVX512VL: # %bb.0:
654-
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
655-
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
654+
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
655+
; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
656656
; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
657-
; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
657+
; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1))
658658
; AVX512VL-NEXT: retq
659659
;
660660
; XOP-LABEL: blend_splat1_mask_cond_v16i8:
@@ -795,18 +795,18 @@ define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32
795795
;
796796
; AVX512F-LABEL: blend_splatmax_mask_cond_v32i8:
797797
; AVX512F: # %bb.0:
798-
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
799-
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
798+
; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
799+
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
800800
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
801-
; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
801+
; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
802802
; AVX512F-NEXT: retq
803803
;
804804
; AVX512VL-LABEL: blend_splatmax_mask_cond_v32i8:
805805
; AVX512VL: # %bb.0:
806-
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
807-
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
806+
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
807+
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
808808
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
809-
; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
809+
; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1))
810810
; AVX512VL-NEXT: retq
811811
;
812812
; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
@@ -972,18 +972,18 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
972972
;
973973
; AVX512F-LABEL: blend_splat_mask_cond_v16i8:
974974
; AVX512F: # %bb.0:
975-
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
976-
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
975+
; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
976+
; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0
977977
; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
978-
; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
978+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
979979
; AVX512F-NEXT: retq
980980
;
981981
; AVX512VL-LABEL: blend_splat_mask_cond_v16i8:
982982
; AVX512VL: # %bb.0:
983-
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
984-
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
983+
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
984+
; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
985985
; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
986-
; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
986+
; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1))
987987
; AVX512VL-NEXT: retq
988988
;
989989
; XOP-LABEL: blend_splat_mask_cond_v16i8:
@@ -1002,10 +1002,10 @@ define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x
10021002
define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
10031003
; AVX1-LABEL: blend_mask_cond_v2i64:
10041004
; AVX1: # %bb.0:
1005-
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1006-
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1005+
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm3 = [1,4]
1006+
; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
10071007
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1008-
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
1008+
; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
10091009
; AVX1-NEXT: retq
10101010
;
10111011
; AVX2-LABEL: blend_mask_cond_v2i64:
@@ -1126,26 +1126,26 @@ define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z
11261126
define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
11271127
; AVX12-LABEL: blend_mask_cond_v16i8:
11281128
; AVX12: # %bb.0:
1129-
; AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1130-
; AVX12-NEXT: vpxor %xmm3, %xmm3, %xmm3
1129+
; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2]
1130+
; AVX12-NEXT: vpand %xmm3, %xmm0, %xmm0
11311131
; AVX12-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1132-
; AVX12-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1132+
; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
11331133
; AVX12-NEXT: retq
11341134
;
11351135
; AVX512F-LABEL: blend_mask_cond_v16i8:
11361136
; AVX512F: # %bb.0:
1137-
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1138-
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1137+
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2]
1138+
; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm0
11391139
; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1140-
; AVX512F-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1140+
; AVX512F-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
11411141
; AVX512F-NEXT: retq
11421142
;
11431143
; AVX512VL-LABEL: blend_mask_cond_v16i8:
11441144
; AVX512VL: # %bb.0:
1145-
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1146-
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1145+
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2]
1146+
; AVX512VL-NEXT: vpand %xmm3, %xmm0, %xmm0
11471147
; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0
1148-
; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm2 ^ (xmm0 & (xmm1 ^ xmm2))
1148+
; AVX512VL-NEXT: vpternlogq {{.*#+}} xmm0 = xmm1 ^ (xmm0 & (xmm2 ^ xmm1))
11491149
; AVX512VL-NEXT: retq
11501150
;
11511151
; XOP-LABEL: blend_mask_cond_v16i8:
@@ -1326,26 +1326,26 @@ define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z
13261326
;
13271327
; AVX2-LABEL: blend_mask_cond_v32i8:
13281328
; AVX2: # %bb.0:
1329-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1330-
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1329+
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16]
1330+
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
13311331
; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1332-
; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1332+
; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
13331333
; AVX2-NEXT: retq
13341334
;
13351335
; AVX512F-LABEL: blend_mask_cond_v32i8:
13361336
; AVX512F: # %bb.0:
1337-
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1338-
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
1337+
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16]
1338+
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
13391339
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1340-
; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1340+
; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
13411341
; AVX512F-NEXT: retq
13421342
;
13431343
; AVX512VL-LABEL: blend_mask_cond_v32i8:
13441344
; AVX512VL: # %bb.0:
1345-
; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1346-
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
1345+
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,4,4,4,4,2,2,2,2,1,2,4,8,16,32,64,128,4,4,4,4,128,4,2,16]
1346+
; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0
13471347
; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0
1348-
; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm2 ^ (ymm0 & (ymm1 ^ ymm2))
1348+
; AVX512VL-NEXT: vpternlogq {{.*#+}} ymm0 = ymm1 ^ (ymm0 & (ymm2 ^ ymm1))
13491349
; AVX512VL-NEXT: retq
13501350
;
13511351
; XOP-LABEL: blend_mask_cond_v32i8:
@@ -1736,17 +1736,16 @@ define <64 x i8> @PR110875(<32 x i8> %a0, <32 x i8> %a1, i64 %a2) {
17361736
; AVX2: # %bb.0:
17371737
; AVX2-NEXT: vmovq %rdi, %xmm2
17381738
; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2
1739-
; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
1740-
; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1739+
; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
1740+
; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
17411741
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
17421742
; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
17431743
; AVX2-NEXT: vpand %ymm4, %ymm3, %ymm3
1744-
; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4
17451744
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3
1745+
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm5 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20]
1746+
; AVX2-NEXT: vpblendvb %ymm3, %ymm5, %ymm0, %ymm0
17461747
; AVX2-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2
1747-
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm4 = [20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20]
1748-
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
1749-
; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
1748+
; AVX2-NEXT: vpblendvb %ymm2, %ymm5, %ymm1, %ymm1
17501749
; AVX2-NEXT: retq
17511750
;
17521751
; AVX512F-LABEL: PR110875:

0 commit comments

Comments
 (0)