Skip to content

Commit 761a963

Browse files
authored
[DAG] narrowExtractedVectorBinOp - ensure we limit late node creation to LegalOperations only (#72130)
Avoids infinite issues in some upcoming patches to help D152928 - x86 sees a number of regressions that are addressed by extending SimplifyDemandedVectorEltsForTargetNode to cover more binop opcodes
1 parent 4028dd2 commit 761a963

File tree

7 files changed

+43
-28
lines changed

7 files changed

+43
-28
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24076,7 +24076,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
2407624076
// Bail out if the target does not support a narrower version of the binop.
2407724077
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
2407824078
WideNumElts / NarrowingRatio);
24079-
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
24079+
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24080+
LegalOperations))
2408024081
return SDValue();
2408124082

2408224083
// If extraction is cheap, we don't need to look at the binop operands

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41450,6 +41450,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4145041450
// Integer ops.
4145141451
case X86ISD::PACKSS:
4145241452
case X86ISD::PACKUS:
41453+
case X86ISD::PCMPEQ:
41454+
case X86ISD::PCMPGT:
41455+
case X86ISD::PMULUDQ:
41456+
case X86ISD::PMULDQ:
41457+
case X86ISD::VSHLV:
41458+
case X86ISD::VSRLV:
41459+
case X86ISD::VSRAV:
41460+
// Float ops.
41461+
case X86ISD::FMAX:
41462+
case X86ISD::FMIN:
41463+
case X86ISD::FMAXC:
41464+
case X86ISD::FMINC:
4145341465
// Horizontal Ops.
4145441466
case X86ISD::HADD:
4145541467
case X86ISD::HSUB:

llvm/test/Analysis/CostModel/AArch64/vector-select.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
125125
; CODE: mov
126126
; CODE: mov
127127
; CODE: mov
128-
; CODE: ldr
129-
; CODE: cmge
130128
; CODE: cmge
129+
; CODE: ldr
131130
; CODE: bif
131+
; CODE: cmge
132132
; CODE: bif
133133
; CODE: ext
134134
; CODE: ret

llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,15 +1860,14 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
18601860
; CHECK-LABEL: umaxv_v3i64:
18611861
; CHECK: // %bb.0: // %entry
18621862
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
1863+
; CHECK-NEXT: mov v3.16b, v2.16b
18631864
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1864-
; CHECK-NEXT: mov v3.16b, v0.16b
1865-
; CHECK-NEXT: mov v4.16b, v2.16b
18661865
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
1867-
; CHECK-NEXT: mov v3.d[1], v1.d[0]
1868-
; CHECK-NEXT: mov v4.d[1], xzr
1869-
; CHECK-NEXT: cmhi v3.2d, v3.2d, v4.2d
1866+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
1867+
; CHECK-NEXT: mov v3.d[1], xzr
1868+
; CHECK-NEXT: cmhi v3.2d, v0.2d, v3.2d
18701869
; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8
1871-
; CHECK-NEXT: bif v0.8b, v2.8b, v3.8b
1870+
; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b
18721871
; CHECK-NEXT: and v1.8b, v1.8b, v4.8b
18731872
; CHECK-NEXT: cmhi d2, d0, d1
18741873
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
@@ -1930,4 +1929,4 @@ define i128 @umaxv_v2i128(<2 x i128> %a) {
19301929
entry:
19311930
%arg1 = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
19321931
ret i128 %arg1
1933-
}
1932+
}

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,11 +1077,10 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwin
10771077
; KNL-LABEL: test_extractelement_v64i1:
10781078
; KNL: ## %bb.0:
10791079
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1080-
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
10811080
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1081+
; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
1082+
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
10821083
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
1083-
; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
1084-
; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
10851084
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
10861085
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
10871086
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1113,11 +1112,10 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind
11131112
; KNL-LABEL: extractelement_v64i1_alt:
11141113
; KNL: ## %bb.0:
11151114
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1116-
; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
11171115
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
1116+
; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1
1117+
; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
11181118
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
1119-
; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1
1120-
; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
11211119
; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
11221120
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
11231121
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,18 +1068,21 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
10681068
;
10691069
; CHECK-AVX2-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:
10701070
; CHECK-AVX2: # %bb.0:
1071-
; CHECK-AVX2-NEXT: subq $40, %rsp
1072-
; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1073-
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
1074-
; CHECK-AVX2-NEXT: vpsllvd %xmm0, %xmm1, %xmm0
1075-
; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1071+
; CHECK-AVX2-NEXT: subq $56, %rsp
1072+
; CHECK-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1073+
; CHECK-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [2,2,0,0,2,2,0,0]
1074+
; CHECK-AVX2-NEXT: # ymm1 = mem[0,1,0,1]
1075+
; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
1076+
; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
10761077
; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax
10771078
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
1079+
; CHECK-AVX2-NEXT: vzeroupper
10781080
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
10791081
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1080-
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1082+
; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
10811083
; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax
10821084
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
1085+
; CHECK-AVX2-NEXT: vzeroupper
10831086
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
10841087
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
10851088
; CHECK-AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -1092,7 +1095,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
10921095
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
10931096
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
10941097
; CHECK-AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1095-
; CHECK-AVX2-NEXT: addq $40, %rsp
1098+
; CHECK-AVX2-NEXT: addq $56, %rsp
10961099
; CHECK-AVX2-NEXT: retq
10971100
;
10981101
; CHECK-NO-FASTFMA-LABEL: fmul_pow_shl_cnt_vec_fail_to_large:

llvm/test/CodeGen/X86/kshift.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,10 +270,11 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
270270
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
271271
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
272272
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
273+
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
273274
; KNL-NEXT: kshiftlw $15, %k0, %k1
274-
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm0
275+
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
276+
; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
275277
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
276-
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
277278
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
278279
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
279280
; KNL-NEXT: kmovw %k0, %eax
@@ -563,13 +564,14 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
563564
; KNL-LABEL: kshiftr_v64i1_63:
564565
; KNL: # %bb.0:
565566
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
566-
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
567567
; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2
568-
; KNL-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
568+
; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
569+
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
569570
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
570571
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
571572
; KNL-NEXT: kshiftrw $15, %k0, %k1
572-
; KNL-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
573+
; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0
574+
; KNL-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
573575
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
574576
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
575577
; KNL-NEXT: kmovw %k0, %eax

0 commit comments

Comments
 (0)