Skip to content

Commit cf62047

Browse files
committed
[X86][SSE] Call SimplifyMultipleUseDemandedBits on PACKSS/PACKUS arguments.
This mainly helps to replace unused arguments with UNDEF in the case where they have multiple users. llvm-svn: 368026
1 parent 87951c8 commit cf62047

File tree

7 files changed

+36
-16
lines changed

7 files changed

+36
-16
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34129,16 +34129,36 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3412934129
}
3413034130
case X86ISD::PACKSS:
3413134131
case X86ISD::PACKUS: {
34132+
SDValue N0 = Op.getOperand(0);
34133+
SDValue N1 = Op.getOperand(1);
34134+
3413234135
APInt DemandedLHS, DemandedRHS;
3413334136
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
3413434137

3413534138
APInt SrcUndef, SrcZero;
34136-
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, SrcUndef,
34137-
SrcZero, TLO, Depth + 1))
34139+
if (SimplifyDemandedVectorElts(N0, DemandedLHS, SrcUndef, SrcZero, TLO,
34140+
Depth + 1))
3413834141
return true;
34139-
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, SrcUndef,
34140-
SrcZero, TLO, Depth + 1))
34142+
if (SimplifyDemandedVectorElts(N1, DemandedRHS, SrcUndef, SrcZero, TLO,
34143+
Depth + 1))
3414134144
return true;
34145+
34146+
// Aggressively peek through ops to get at the demanded elts.
34147+
// TODO - we should do this for all target/faux shuffles ops.
34148+
if (!DemandedElts.isAllOnesValue()) {
34149+
APInt DemandedSrcBits =
34150+
APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
34151+
SDValue NewN0 = SimplifyMultipleUseDemandedBits(
34152+
N0, DemandedSrcBits, DemandedLHS, TLO.DAG, Depth + 1);
34153+
SDValue NewN1 = SimplifyMultipleUseDemandedBits(
34154+
N1, DemandedSrcBits, DemandedRHS, TLO.DAG, Depth + 1);
34155+
if (NewN0 || NewN1) {
34156+
NewN0 = NewN0 ? NewN0 : N0;
34157+
NewN1 = NewN1 ? NewN1 : N1;
34158+
return TLO.CombineTo(Op,
34159+
TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));
34160+
}
34161+
}
3414234162
break;
3414334163
}
3414434164
case X86ISD::HADD:

llvm/test/CodeGen/X86/vector-trunc-packus-widen.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
19821982
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
19831983
; SSE41-NEXT: packusdw %xmm3, %xmm1
19841984
; SSE41-NEXT: packusdw %xmm1, %xmm4
1985-
; SSE41-NEXT: packuswb %xmm4, %xmm4
1985+
; SSE41-NEXT: packuswb %xmm0, %xmm4
19861986
; SSE41-NEXT: movq %xmm4, (%rdi)
19871987
; SSE41-NEXT: retq
19881988
;

llvm/test/CodeGen/X86/vector-trunc-packus.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1982,7 +1982,7 @@ define void @trunc_packus_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
19821982
; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm1
19831983
; SSE41-NEXT: packusdw %xmm3, %xmm1
19841984
; SSE41-NEXT: packusdw %xmm1, %xmm4
1985-
; SSE41-NEXT: packuswb %xmm4, %xmm4
1985+
; SSE41-NEXT: packuswb %xmm0, %xmm4
19861986
; SSE41-NEXT: movq %xmm4, (%rdi)
19871987
; SSE41-NEXT: retq
19881988
;

llvm/test/CodeGen/X86/vector-trunc-ssat-widen.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
19271927
; SSE41-NEXT: andpd %xmm0, %xmm2
19281928
; SSE41-NEXT: packusdw %xmm7, %xmm2
19291929
; SSE41-NEXT: packusdw %xmm3, %xmm2
1930-
; SSE41-NEXT: packuswb %xmm2, %xmm2
1930+
; SSE41-NEXT: packuswb %xmm0, %xmm2
19311931
; SSE41-NEXT: movq %xmm2, (%rdi)
19321932
; SSE41-NEXT: retq
19331933
;

llvm/test/CodeGen/X86/vector-trunc-ssat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1927,7 +1927,7 @@ define void @trunc_ssat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
19271927
; SSE41-NEXT: andpd %xmm0, %xmm2
19281928
; SSE41-NEXT: packusdw %xmm7, %xmm2
19291929
; SSE41-NEXT: packusdw %xmm3, %xmm2
1930-
; SSE41-NEXT: packuswb %xmm2, %xmm2
1930+
; SSE41-NEXT: packuswb %xmm0, %xmm2
19311931
; SSE41-NEXT: movq %xmm2, (%rdi)
19321932
; SSE41-NEXT: retq
19331933
;

llvm/test/CodeGen/X86/vector-trunc-usat-widen.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
12471247
; SSE2-NEXT: por %xmm2, %xmm0
12481248
; SSE2-NEXT: packuswb %xmm4, %xmm0
12491249
; SSE2-NEXT: packuswb %xmm0, %xmm1
1250-
; SSE2-NEXT: packuswb %xmm1, %xmm1
1250+
; SSE2-NEXT: packuswb %xmm0, %xmm1
12511251
; SSE2-NEXT: movq %xmm1, (%rdi)
12521252
; SSE2-NEXT: retq
12531253
;
@@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
13101310
; SSSE3-NEXT: por %xmm2, %xmm0
13111311
; SSSE3-NEXT: packuswb %xmm4, %xmm0
13121312
; SSSE3-NEXT: packuswb %xmm0, %xmm1
1313-
; SSSE3-NEXT: packuswb %xmm1, %xmm1
1313+
; SSSE3-NEXT: packuswb %xmm0, %xmm1
13141314
; SSSE3-NEXT: movq %xmm1, (%rdi)
13151315
; SSSE3-NEXT: retq
13161316
;
@@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
13641364
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
13651365
; SSE41-NEXT: packusdw %xmm5, %xmm9
13661366
; SSE41-NEXT: packusdw %xmm9, %xmm1
1367-
; SSE41-NEXT: packuswb %xmm1, %xmm1
1367+
; SSE41-NEXT: packuswb %xmm0, %xmm1
13681368
; SSE41-NEXT: movq %xmm1, (%rdi)
13691369
; SSE41-NEXT: retq
13701370
;
@@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
19981998
; SSE2-NEXT: pandn %xmm2, %xmm5
19991999
; SSE2-NEXT: por %xmm0, %xmm5
20002000
; SSE2-NEXT: packuswb %xmm6, %xmm5
2001-
; SSE2-NEXT: packuswb %xmm5, %xmm5
2001+
; SSE2-NEXT: packuswb %xmm0, %xmm5
20022002
; SSE2-NEXT: movq %xmm5, (%rdi)
20032003
; SSE2-NEXT: retq
20042004
;

llvm/test/CodeGen/X86/vector-trunc-usat.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,7 +1247,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
12471247
; SSE2-NEXT: por %xmm2, %xmm0
12481248
; SSE2-NEXT: packuswb %xmm4, %xmm0
12491249
; SSE2-NEXT: packuswb %xmm0, %xmm1
1250-
; SSE2-NEXT: packuswb %xmm1, %xmm1
1250+
; SSE2-NEXT: packuswb %xmm0, %xmm1
12511251
; SSE2-NEXT: movq %xmm1, (%rdi)
12521252
; SSE2-NEXT: retq
12531253
;
@@ -1310,7 +1310,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
13101310
; SSSE3-NEXT: por %xmm2, %xmm0
13111311
; SSSE3-NEXT: packuswb %xmm4, %xmm0
13121312
; SSSE3-NEXT: packuswb %xmm0, %xmm1
1313-
; SSSE3-NEXT: packuswb %xmm1, %xmm1
1313+
; SSSE3-NEXT: packuswb %xmm0, %xmm1
13141314
; SSSE3-NEXT: movq %xmm1, (%rdi)
13151315
; SSSE3-NEXT: retq
13161316
;
@@ -1364,7 +1364,7 @@ define void @trunc_usat_v8i64_v8i8_store(<8 x i64> %a0, <8 x i8> *%p1) {
13641364
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm9
13651365
; SSE41-NEXT: packusdw %xmm5, %xmm9
13661366
; SSE41-NEXT: packusdw %xmm9, %xmm1
1367-
; SSE41-NEXT: packuswb %xmm1, %xmm1
1367+
; SSE41-NEXT: packuswb %xmm0, %xmm1
13681368
; SSE41-NEXT: movq %xmm1, (%rdi)
13691369
; SSE41-NEXT: retq
13701370
;
@@ -1998,7 +1998,7 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) {
19981998
; SSE2-NEXT: pandn %xmm2, %xmm5
19991999
; SSE2-NEXT: por %xmm0, %xmm5
20002000
; SSE2-NEXT: packuswb %xmm6, %xmm5
2001-
; SSE2-NEXT: packuswb %xmm5, %xmm5
2001+
; SSE2-NEXT: packuswb %xmm0, %xmm5
20022002
; SSE2-NEXT: movq %xmm5, (%rdi)
20032003
; SSE2-NEXT: retq
20042004
;

0 commit comments

Comments
 (0)