Skip to content

Commit f857ed6

Browse files
committed
[X86] computeKnownBitsForTargetNode - add handling for (V)PMADDWD nodes
1 parent b381d1e commit f857ed6

File tree

2 files changed

+56
-17
lines changed

2 files changed

+56
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37082,6 +37082,33 @@ static void computeKnownBitsForPSADBW(SDValue LHS, SDValue RHS,
3708237082
Known = Known.zext(64);
3708337083
}
3708437084

37085+
static void computeKnownBitsForPMADDWD(SDValue LHS, SDValue RHS,
37086+
KnownBits &Known,
37087+
const APInt &DemandedElts,
37088+
const SelectionDAG &DAG,
37089+
unsigned Depth) {
37090+
unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
37091+
37092+
// Multiply signed i16 elements to create i32 values and add Lo/Hi pairs.
37093+
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
37094+
APInt DemandedLoElts =
37095+
DemandedSrcElts & APInt::getSplat(NumSrcElts, APInt(2, 0b01));
37096+
APInt DemandedHiElts =
37097+
DemandedSrcElts & APInt::getSplat(NumSrcElts, APInt(2, 0b10));
37098+
KnownBits LHSLo =
37099+
DAG.computeKnownBits(LHS, DemandedLoElts, Depth + 1).sext(32);
37100+
KnownBits LHSHi =
37101+
DAG.computeKnownBits(LHS, DemandedHiElts, Depth + 1).sext(32);
37102+
KnownBits RHSLo =
37103+
DAG.computeKnownBits(RHS, DemandedLoElts, Depth + 1).sext(32);
37104+
KnownBits RHSHi =
37105+
DAG.computeKnownBits(RHS, DemandedHiElts, Depth + 1).sext(32);
37106+
KnownBits Lo = KnownBits::mul(LHSLo, RHSLo);
37107+
KnownBits Hi = KnownBits::mul(LHSHi, RHSHi);
37108+
Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/true,
37109+
/*NUW=*/false, Lo, Hi);
37110+
}
37111+
3708537112
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3708637113
KnownBits &Known,
3708737114
const APInt &DemandedElts,
@@ -37257,6 +37284,16 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3725737284
}
3725837285
break;
3725937286
}
37287+
case X86ISD::VPMADDWD: {
37288+
SDValue LHS = Op.getOperand(0);
37289+
SDValue RHS = Op.getOperand(1);
37290+
assert(VT.getVectorElementType() == MVT::i32 &&
37291+
LHS.getValueType() == RHS.getValueType() &&
37292+
LHS.getValueType().getVectorElementType() == MVT::i16 &&
37293+
"Unexpected PMADDWD types");
37294+
computeKnownBitsForPMADDWD(LHS, RHS, Known, DemandedElts, DAG, Depth);
37295+
break;
37296+
}
3726037297
case X86ISD::PMULUDQ: {
3726137298
KnownBits Known2;
3726237299
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -37393,6 +37430,18 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3739337430
}
3739437431
case ISD::INTRINSIC_WO_CHAIN: {
3739537432
switch (Op->getConstantOperandVal(0)) {
37433+
case Intrinsic::x86_sse2_pmadd_wd:
37434+
case Intrinsic::x86_avx2_pmadd_wd:
37435+
case Intrinsic::x86_avx512_pmaddw_d_512: {
37436+
SDValue LHS = Op.getOperand(1);
37437+
SDValue RHS = Op.getOperand(2);
37438+
assert(VT.getScalarType() == MVT::i32 &&
37439+
LHS.getValueType() == RHS.getValueType() &&
37440+
LHS.getValueType().getScalarType() == MVT::i16 &&
37441+
"Unexpected PMADDWD types");
37442+
computeKnownBitsForPMADDWD(LHS, RHS, Known, DemandedElts, DAG, Depth);
37443+
break;
37444+
}
3739637445
case Intrinsic::x86_sse2_psad_bw:
3739737446
case Intrinsic::x86_avx2_psad_bw:
3739837447
case Intrinsic::x86_avx512_psad_bw_512: {

llvm/test/CodeGen/X86/combine-pmadd.ll

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
55

66
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
77
declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
@@ -34,21 +34,11 @@ define <4 x i32> @combine_pmaddwd_zero_commute(<8 x i16> %a0, <8 x i16> %a1) {
3434
ret <4 x i32> %1
3535
}
3636

37-
; TODO: pmaddwd knownbits handling
3837
define i32 @combine_pmaddwd_constant() {
39-
; SSE-LABEL: combine_pmaddwd_constant:
40-
; SSE: # %bb.0:
41-
; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
42-
; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
43-
; SSE-NEXT: pextrd $2, %xmm0, %eax
44-
; SSE-NEXT: retq
45-
;
46-
; AVX-LABEL: combine_pmaddwd_constant:
47-
; AVX: # %bb.0:
48-
; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
49-
; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
50-
; AVX-NEXT: vpextrd $2, %xmm0, %eax
51-
; AVX-NEXT: retq
38+
; CHECK-LABEL: combine_pmaddwd_constant:
39+
; CHECK: # %bb.0:
40+
; CHECK-NEXT: movl $-155, %eax
41+
; CHECK-NEXT: retq
5242
%1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -9, i16 -11, i16 13, i16 -15, i16 17, i16 -19>)
5343
%2 = extractelement <4 x i32> %1, i32 2 ; (-5*13)+(6*-15) = -155
5444
ret i32 %2

0 commit comments

Comments
 (0)