Skip to content

Commit 8b8949c

Browse files
committed
Undo AMDGPU changes. Will be done in separate PR. Remove FIXME from wrong place.
Signed-off-by: John Lu <[email protected]>
1 parent 8cc932e commit 8b8949c

File tree

3 files changed

+22
-58
lines changed

3 files changed

+22
-58
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4017,9 +4017,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
40174017
if (const MDNode *MD = LD->getRanges()) {
40184018
ConstantInt *Lower = mdconst::extract<ConstantInt>(MD->getOperand(0));
40194019

4020-
// FIXME: If loads are modified (e.g. type legalization)
4021-
// so that the load type no longer matches the range metadata type, the
4022-
// range metadata should be updated to match the new load width.
40234020
Known0 = Known0.trunc(Lower->getBitWidth());
40244021
computeKnownBitsFromRangeMetadata(*MD, Known0);
40254022
if (VT.isVector()) {

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 11 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4084,32 +4084,29 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
40844084
}
40854085
}
40864086

4087-
if (VT.getScalarType() != MVT::i64)
4087+
if (VT != MVT::i64)
40884088
return SDValue();
40894089

40904090
// i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
40914091

40924092
// On some subtargets, 64-bit shift is a quarter rate instruction. In the
40934093
// common case, splitting this into a move and a 32-bit shift is faster and
40944094
// the same code size.
4095+
EVT TargetType = VT.getHalfSizedIntegerVT(*DAG.getContext());
4096+
EVT TargetVecPairType = EVT::getVectorVT(*DAG.getContext(), TargetType, 2);
40954097
KnownBits Known = DAG.computeKnownBits(RHS);
40964098

4097-
EVT ElementType = VT.getScalarType();
4098-
EVT TargetScalarType = ElementType.getHalfSizedIntegerVT(*DAG.getContext());
4099-
EVT TargetType = (VT.isVector() ? VT.changeVectorElementType(TargetScalarType)
4100-
: TargetScalarType);
4101-
4102-
if (Known.getMinValue().getZExtValue() < TargetScalarType.getSizeInBits())
4099+
if (Known.getMinValue().getZExtValue() < TargetType.getSizeInBits())
41034100
return SDValue();
41044101
SDValue ShiftAmt;
41054102

41064103
if (CRHS) {
4107-
ShiftAmt = DAG.getConstant(RHSVal - TargetScalarType.getSizeInBits(), SL,
4108-
TargetType);
4104+
ShiftAmt =
4105+
DAG.getConstant(RHSVal - TargetType.getSizeInBits(), SL, TargetType);
41094106
} else {
41104107
SDValue truncShiftAmt = DAG.getNode(ISD::TRUNCATE, SL, TargetType, RHS);
41114108
const SDValue ShiftMask =
4112-
DAG.getConstant(TargetScalarType.getSizeInBits() - 1, SL, TargetType);
4109+
DAG.getConstant(TargetType.getSizeInBits() - 1, SL, TargetType);
41134110
// This AND instruction will clamp out of bounds shift values.
41144111
// It will also be removed during later instruction selection.
41154112
ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, truncShiftAmt, ShiftMask);
@@ -4119,24 +4116,9 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
41194116
SDValue NewShift =
41204117
DAG.getNode(ISD::SHL, SL, TargetType, Lo, ShiftAmt, N->getFlags());
41214118

4122-
const SDValue Zero = DAG.getConstant(0, SL, TargetScalarType);
4123-
SDValue Vec;
4124-
4125-
if (VT.isVector()) {
4126-
EVT ConcatType = TargetType.getDoubleNumVectorElementsVT(*DAG.getContext());
4127-
SmallVector<SDValue, 8> Ops;
4128-
for (unsigned I = 0, E = TargetType.getVectorNumElements(); I != E; ++I) {
4129-
SDValue Index = DAG.getConstant(I, SL, MVT::i32);
4130-
Ops.push_back(Zero);
4131-
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, TargetScalarType,
4132-
NewShift, Index);
4133-
Ops.push_back(Elt);
4134-
}
4135-
Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, ConcatType, Ops);
4136-
} else {
4137-
EVT ConcatType = EVT::getVectorVT(*DAG.getContext(), TargetType, 2);
4138-
Vec = DAG.getBuildVector(ConcatType, SL, {Zero, NewShift});
4139-
}
4119+
const SDValue Zero = DAG.getConstant(0, SL, TargetType);
4120+
4121+
SDValue Vec = DAG.getBuildVector(TargetVecPairType, SL, {Zero, NewShift});
41404122
return DAG.getNode(ISD::BITCAST, SL, VT, Vec);
41414123
}
41424124

@@ -5200,13 +5182,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
52005182
break;
52015183
}
52025184
case ISD::SHL: {
5203-
// Range metadata can be invalidated when loads are converted to legal types
5204-
// (e.g. v2i64 -> v4i32).
5205-
// Try to convert vector shl before type legalization so that range metadata
5206-
// can be utilized.
5207-
if (!(N->getValueType(0).isVector() &&
5208-
DCI.getDAGCombineLevel() == BeforeLegalizeTypes) &&
5209-
DCI.getDAGCombineLevel() < AfterLegalizeDAG)
5185+
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
52105186
break;
52115187

52125188
return performShlCombine(N, DCI);

llvm/test/CodeGen/AMDGPU/shl64_reduce.ll

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
3434
; CHECK-LABEL: shl_v2_metadata:
3535
; CHECK: ; %bb.0:
3636
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37-
; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[4:5]
37+
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
3838
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
39-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v3, v0
40-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, v5, v2
41-
; CHECK-NEXT: v_mov_b32_e32 v0, 0
42-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
39+
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
40+
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
4341
; CHECK-NEXT: s_setpc_b64 s[30:31]
4442
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0
4543
%shl = shl <2 x i64> %arg0, %shift.amt
@@ -50,15 +48,12 @@ define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
5048
; CHECK-LABEL: shl_v3_metadata:
5149
; CHECK: ; %bb.0:
5250
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53-
; CHECK-NEXT: flat_load_dword v1, v[6:7] offset:16
51+
; CHECK-NEXT: flat_load_dword v12, v[6:7] offset:16
5452
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[6:7]
5553
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
56-
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v1, v4
57-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v8, v0
58-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, v10, v2
59-
; CHECK-NEXT: v_mov_b32_e32 v0, 0
60-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
61-
; CHECK-NEXT: v_mov_b32_e32 v4, 0
54+
; CHECK-NEXT: v_lshlrev_b64 v[4:5], v12, v[4:5]
55+
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
56+
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
6257
; CHECK-NEXT: s_setpc_b64 s[30:31]
6358
%shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0
6459
%shl = shl <3 x i64> %arg0, %shift.amt
@@ -73,15 +68,11 @@ define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
7368
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
7469
; CHECK-NEXT: flat_load_dwordx4 v[13:16], v[8:9] offset:16
7570
; CHECK-NEXT: ; kill: killed $vgpr8 killed $vgpr9
76-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v10, v0
77-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, v12, v2
71+
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v10, v[0:1]
72+
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v12, v[2:3]
7873
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
79-
; CHECK-NEXT: v_lshlrev_b32_e32 v5, v13, v4
80-
; CHECK-NEXT: v_lshlrev_b32_e32 v7, v15, v6
81-
; CHECK-NEXT: v_mov_b32_e32 v0, 0
82-
; CHECK-NEXT: v_mov_b32_e32 v2, 0
83-
; CHECK-NEXT: v_mov_b32_e32 v4, 0
84-
; CHECK-NEXT: v_mov_b32_e32 v6, 0
74+
; CHECK-NEXT: v_lshlrev_b64 v[4:5], v13, v[4:5]
75+
; CHECK-NEXT: v_lshlrev_b64 v[6:7], v15, v[6:7]
8576
; CHECK-NEXT: s_setpc_b64 s[30:31]
8677
%shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0
8778
%shl = shl <4 x i64> %arg0, %shift.amt

0 commit comments

Comments
 (0)