Skip to content

Commit 8919a21

Browse files
committed
[AMDGPU] Add a lit test for hasAndNot.
1 parent 0e68f59 commit 8919a21

File tree

3 files changed

+122
-4
lines changed

3 files changed

+122
-4
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7029,6 +7029,81 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
70297029
}
70307030
}
70317031

7032+
SDValue SITargetLowering::combineAnd(SDValue Op,
7033+
DAGCombinerInfo &DCI) const {
7034+
const unsigned Opc = Op.getOpcode();
7035+
assert(Opc == ISD::AND);
7036+
7037+
auto &DAG = DCI.DAG;
7038+
SDLoc DL(Op);
7039+
7040+
if(hasAndNot(Op)) {
7041+
SDValue LHS = Op->getOperand(0);
7042+
SDValue RHS = Op->getOperand(1);
7043+
7044+
// (and LHS, (or Y, ~Z))
7045+
if (RHS.getOpcode() == ISD::OR && RHS.hasOneUse()) {
7046+
SDValue Y = RHS->getOperand(0);
7047+
SDValue NotZ = RHS->getOperand(1);
7048+
7049+
if (NotZ.getOpcode() == ISD::XOR && isAllOnesConstant(NotZ->getOperand(1))) {
7050+
SDValue Z = NotZ->getOperand(0);
7051+
7052+
if (!isa<ConstantSDNode>(Y)) {
7053+
SDValue NotY = DAG.getNOT(DL, Y, Y.getValueType());
7054+
SDValue AndNotYZ = DAG.getNode(ISD::AND, DL, Y.getValueType(), NotY, Z);
7055+
SDValue NotAndNotYZ = DAG.getNOT(DL, AndNotYZ, AndNotYZ.getValueType());
7056+
SDValue NewAnd = DAG.getNode(ISD::AND, DL, Op.getValueType(), LHS, NotAndNotYZ);
7057+
return NewAnd;
7058+
}
7059+
}
7060+
}
7061+
}
7062+
7063+
EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
7064+
: Op->getOperand(0).getValueType();
7065+
auto ExtTy = OpTy.changeElementType(MVT::i32);
7066+
7067+
if (DCI.isBeforeLegalizeOps() ||
7068+
isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
7069+
return SDValue();
7070+
7071+
SDValue LHS;
7072+
SDValue RHS;
7073+
if (Opc == ISD::SELECT) {
7074+
LHS = Op->getOperand(1);
7075+
RHS = Op->getOperand(2);
7076+
} else {
7077+
LHS = Op->getOperand(0);
7078+
RHS = Op->getOperand(1);
7079+
}
7080+
7081+
const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
7082+
LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
7083+
7084+
// Special case: for shifts, the RHS always needs a zext.
7085+
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
7086+
RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
7087+
else
7088+
RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
7089+
7090+
// setcc always return i1/i1 vec so no need to truncate after.
7091+
if (Opc == ISD::SETCC) {
7092+
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7093+
return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
7094+
}
7095+
7096+
// For other ops, we extend the operation's return type as well so we need to
7097+
// truncate back to the original type.
7098+
SDValue NewVal;
7099+
if (Opc == ISD::SELECT)
7100+
NewVal = DAG.getNode(ISD::SELECT, DL, ExtTy, {Op->getOperand(0), LHS, RHS});
7101+
else
7102+
NewVal = DAG.getNode(Opc, DL, ExtTy, {LHS, RHS});
7103+
7104+
return DAG.getZExtOrTrunc(NewVal, DL, OpTy);
7105+
}
7106+
70327107
SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
70337108
DAGCombinerInfo &DCI) const {
70347109
const unsigned Opc = Op.getOpcode();
@@ -15244,13 +15319,17 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
1524415319

1524515320
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1524615321
DAGCombinerInfo &DCI) const {
15322+
SelectionDAG &DAG = DCI.DAG;
1524715323
switch (N->getOpcode()) {
15324+
case ISD::AND:
15325+
if (auto Res = combineAnd(SDValue(N, 0), DCI))
15326+
return Res;
15327+
break;
1524815328
case ISD::ADD:
1524915329
case ISD::SUB:
1525015330
case ISD::SHL:
1525115331
case ISD::SRL:
1525215332
case ISD::SRA:
15253-
case ISD::AND:
1525415333
case ISD::OR:
1525515334
case ISD::XOR:
1525615335
case ISD::MUL:
@@ -15356,7 +15435,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1535615435
case AMDGPUISD::CLAMP:
1535715436
return performClampCombine(N, DCI);
1535815437
case ISD::SCALAR_TO_VECTOR: {
15359-
SelectionDAG &DAG = DCI.DAG;
1536015438
EVT VT = N->getValueType(0);
1536115439

1536215440
// v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
@@ -17527,8 +17605,8 @@ SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
1752717605
}
1752817606

1752917607
bool SITargetLowering::hasAndNot(SDValue Op) const {
17530-
// Return false if the operation is divergent, as AND-NOT optimization
17531-
// requires uniform behavior across threads.
17608+
// Return false if the operation is divergent, as AND-NOT is a scalar-only
17609+
// instruction.
1753217610
if (Op->isDivergent())
1753317611
return false;
1753417612

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
148148
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
149149
SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
150150
SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
151+
SDValue combineAnd(SDValue Op, DAGCombinerInfo &DCI) const;
151152
SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
152153
SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
153154
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;

llvm/test/CodeGen/AMDGPU/andornot.ll

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
2+
3+
; GCN-LABEL: {{^}}scalar_and_or_not_i16
4+
; GCN: s_not_b32
5+
; GCN-NEXT: s_lshr_b32
6+
; GCN-NEXT: s_and_b32
7+
; GCN-NEXT: s_andn2_b32
8+
define amdgpu_kernel void @scalar_and_or_not_i16(ptr addrspace(1) %out, i16 %x, i16 %y, i16 %z) {
9+
entry:
10+
%not_z = xor i16 %z, -1
11+
%or_y_not_z = or i16 %y, %not_z
12+
%and_result = and i16 %x, %or_y_not_z
13+
store i16 %and_result, ptr addrspace(1) %out, align 4
14+
ret void
15+
}
16+
17+
; GCN-LABEL: {{^}}scalar_and_or_not_i32
18+
; GCN: s_andn2_b32
19+
; GCN-NEXT: s_andn2_b32
20+
define amdgpu_kernel void @scalar_and_or_not_i32(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) {
21+
entry:
22+
%not_z = xor i32 %z, -1
23+
%or_y_not_z = or i32 %y, %not_z
24+
%and_result = and i32 %x, %or_y_not_z
25+
store i32 %and_result, ptr addrspace(1) %out, align 4
26+
ret void
27+
}
28+
29+
; GCN-LABEL: {{^}}scalar_and_or_not_i64
30+
; GCN: s_andn2_b64
31+
; GCN-NEXT: s_andn2_b64
32+
define amdgpu_kernel void @scalar_and_or_not_i64(ptr addrspace(1) %out, i64 %x, i64 %y, i64 %z) {
33+
entry:
34+
%not_z = xor i64 %z, -1
35+
%or_y_not_z = or i64 %y, %not_z
36+
%and_result = and i64 %x, %or_y_not_z
37+
store i64 %and_result, ptr addrspace(1) %out, align 4
38+
ret void
39+
}

0 commit comments

Comments
 (0)