Skip to content

Commit 2c9aa28

Browse files
committed
[AMDGPU] Add a lit test for hasAndNot.
1 parent 1417d71 commit 2c9aa28

File tree

3 files changed

+122
-5
lines changed

3 files changed

+122
-5
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6822,6 +6822,81 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
68226822
}
68236823
}
68246824

6825+
SDValue SITargetLowering::combineAnd(SDValue Op,
6826+
DAGCombinerInfo &DCI) const {
6827+
const unsigned Opc = Op.getOpcode();
6828+
assert(Opc == ISD::AND);
6829+
6830+
auto &DAG = DCI.DAG;
6831+
SDLoc DL(Op);
6832+
6833+
if(hasAndNot(Op)) {
6834+
SDValue LHS = Op->getOperand(0);
6835+
SDValue RHS = Op->getOperand(1);
6836+
6837+
// (and LHS, (or Y, ~Z))
6838+
if (RHS.getOpcode() == ISD::OR && RHS.hasOneUse()) {
6839+
SDValue Y = RHS->getOperand(0);
6840+
SDValue NotZ = RHS->getOperand(1);
6841+
6842+
if (NotZ.getOpcode() == ISD::XOR && isAllOnesConstant(NotZ->getOperand(1))) {
6843+
SDValue Z = NotZ->getOperand(0);
6844+
6845+
if (!isa<ConstantSDNode>(Y)) {
6846+
SDValue NotY = DAG.getNOT(DL, Y, Y.getValueType());
6847+
SDValue AndNotYZ = DAG.getNode(ISD::AND, DL, Y.getValueType(), NotY, Z);
6848+
SDValue NotAndNotYZ = DAG.getNOT(DL, AndNotYZ, AndNotYZ.getValueType());
6849+
SDValue NewAnd = DAG.getNode(ISD::AND, DL, Op.getValueType(), LHS, NotAndNotYZ);
6850+
return NewAnd;
6851+
}
6852+
}
6853+
}
6854+
}
6855+
6856+
EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
6857+
: Op->getOperand(0).getValueType();
6858+
auto ExtTy = OpTy.changeElementType(MVT::i32);
6859+
6860+
if (DCI.isBeforeLegalizeOps() ||
6861+
isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
6862+
return SDValue();
6863+
6864+
SDValue LHS;
6865+
SDValue RHS;
6866+
if (Opc == ISD::SELECT) {
6867+
LHS = Op->getOperand(1);
6868+
RHS = Op->getOperand(2);
6869+
} else {
6870+
LHS = Op->getOperand(0);
6871+
RHS = Op->getOperand(1);
6872+
}
6873+
6874+
const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
6875+
LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
6876+
6877+
// Special case: for shifts, the RHS always needs a zext.
6878+
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
6879+
RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
6880+
else
6881+
RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
6882+
6883+
// setcc always return i1/i1 vec so no need to truncate after.
6884+
if (Opc == ISD::SETCC) {
6885+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6886+
return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
6887+
}
6888+
6889+
// For other ops, we extend the operation's return type as well so we need to
6890+
// truncate back to the original type.
6891+
SDValue NewVal;
6892+
if (Opc == ISD::SELECT)
6893+
NewVal = DAG.getNode(ISD::SELECT, DL, ExtTy, {Op->getOperand(0), LHS, RHS});
6894+
else
6895+
NewVal = DAG.getNode(Opc, DL, ExtTy, {LHS, RHS});
6896+
6897+
return DAG.getZExtOrTrunc(NewVal, DL, OpTy);
6898+
}
6899+
68256900
SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
68266901
DAGCombinerInfo &DCI) const {
68276902
const unsigned Opc = Op.getOpcode();
@@ -14797,16 +14872,19 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
1479714872
return SDValue(CSrc, 0);
1479814873
}
1479914874

14800-
1480114875
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1480214876
DAGCombinerInfo &DCI) const {
14877+
SelectionDAG &DAG = DCI.DAG;
1480314878
switch (N->getOpcode()) {
14879+
case ISD::AND:
14880+
if (auto Res = combineAnd(SDValue(N, 0), DCI))
14881+
return Res;
14882+
break;
1480414883
case ISD::ADD:
1480514884
case ISD::SUB:
1480614885
case ISD::SHL:
1480714886
case ISD::SRL:
1480814887
case ISD::SRA:
14809-
case ISD::AND:
1481014888
case ISD::OR:
1481114889
case ISD::XOR:
1481214890
case ISD::MUL:
@@ -14910,7 +14988,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1491014988
case AMDGPUISD::CLAMP:
1491114989
return performClampCombine(N, DCI);
1491214990
case ISD::SCALAR_TO_VECTOR: {
14913-
SelectionDAG &DAG = DCI.DAG;
1491414991
EVT VT = N->getValueType(0);
1491514992

1491614993
// v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
@@ -16892,8 +16969,8 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
1689216969
}
1689316970

1689416971
bool SITargetLowering::hasAndNot(SDValue Op) const {
16895-
// Return false if the operation is divergent, as AND-NOT optimization
16896-
// requires uniform behavior across threads.
16972+
// Return false if the operation is divergent, as AND-NOT is a scalar-only
16973+
// instruction.
1689716974
if (Op->isDivergent())
1689816975
return false;
1689916976

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
147147
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148148
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
149149
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
150+
SDValue combineAnd(SDValue Op, DAGCombinerInfo &DCI) const;
150151
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
151152
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
152153
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/AMDGPU/andornot.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
2+
3+
; GCN-LABEL: {{^}}scalar_and_or_not_i16
4+
; GCN: s_not_b32
5+
; GCN-NEXT: s_lshr_b32
6+
; GCN-NEXT: s_and_b32
7+
; GCN-NEXT: s_andn2_b32
8+
define amdgpu_kernel void @scalar_and_or_not_i16(ptr addrspace(1) %out, i16 %x, i16 %y, i16 %z) {
9+
entry:
10+
%not_z = xor i16 %z, -1
11+
%or_y_not_z = or i16 %y, %not_z
12+
%and_result = and i16 %x, %or_y_not_z
13+
store i16 %and_result, ptr addrspace(1) %out, align 4
14+
ret void
15+
}
16+
17+
; GCN-LABEL: {{^}}scalar_and_or_not_i32
18+
; GCN: s_andn2_b32
19+
; GCN-NEXT: s_andn2_b32
20+
define amdgpu_kernel void @scalar_and_or_not_i32(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
21+
entry:
22+
%not_z = xor i32 %z, -1
23+
%or_y_not_z = or i32 %y, %not_z
24+
%and_result = and i32 %x, %or_y_not_z
25+
store i32 %and_result, ptr addrspace(1) %out, align 4
26+
ret void
27+
}
28+
29+
; GCN-LABEL: {{^}}scalar_and_or_not_i64
30+
; GCN: s_andn2_b64
31+
; GCN-NEXT: s_andn2_b64
32+
define amdgpu_kernel void @scalar_and_or_not_i64(ptr addrspace(1) %out, i64 %x, i64 %y, i64 %z) {
33+
entry:
34+
%not_z = xor i64 %z, -1
35+
%or_y_not_z = or i64 %y, %not_z
36+
%and_result = and i64 %x, %or_y_not_z
37+
store i64 %and_result, ptr addrspace(1) %out, align 4
38+
ret void
39+
}

0 commit comments

Comments
 (0)