Skip to content

Commit bcb5393

Browse files
committed
Use custom for v2i32 rotr instead of additional patterns. Tidy up PerformOrCombine()
1 parent 714054b commit bcb5393

File tree

2 files changed

+15
-62
lines changed

2 files changed

+15
-62
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -431,12 +431,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
431431
}
432432

433433
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, MVT::v2i32, Legal);
434-
// Prevent SELECT from being implemented with the above bitwise ops and
435-
// instead use cndmask.
434+
// Prevent SELECT v2i32 from being implemented with the above bitwise ops and
435+
// instead lower to cndmask in SITargetLowering::LowerSELECT().
436436
setOperationAction(ISD::SELECT, MVT::v2i32, Custom);
437437
// Enable MatchRotate to produce ISD::ROTR, which is later transformed to
438438
// alignbit.
439-
setOperationAction(ISD::ROTR, MVT::v2i32, Legal);
439+
setOperationAction(ISD::ROTR, MVT::v2i32, Custom);
440440

441441
setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16, MVT::v4bf16},
442442
Custom);
@@ -12893,11 +12893,6 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
1289312893
if (VT == MVT::v2i32) {
1289412894
if (LHS->getOpcode() == ISD::BUILD_VECTOR &&
1289512895
RHS->getOpcode() == ISD::BUILD_VECTOR) {
12896-
// DAG.canonicalizeCommutativeBinop(ISD::OR, RHS, LHS);
12897-
SDValue BVLHS = LHS->getOperand(0);
12898-
SDValue CLHS = LHS->getOperand(1);
12899-
SDValue CRHS = RHS->getOperand(0);
12900-
SDValue BVRHS = RHS->getOperand(1);
1290112896
LLVM_DEBUG(dbgs() << "### Performing v2i32 SIISelLowering "
1290212897
"DAGCombine::CombineOR\n";);
1290312898

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 12 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2379,18 +2379,18 @@ def : AMDGPUPat <
23792379
let True16Predicate = NotHasTrue16BitInsts in {
23802380
def : ROTRPattern <V_ALIGNBIT_B32_e64>;
23812381

2382-
def : AMDGPUPat <
2383-
(rotr v2i32:$src0, v2i32:$src1),
2384-
(REG_SEQUENCE VReg_64,
2385-
(V_ALIGNBIT_B32_e64
2386-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2387-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2388-
(i32 (EXTRACT_SUBREG VReg_64:$src1, sub0))), sub0,
2389-
(V_ALIGNBIT_B32_e64
2390-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2391-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2392-
(i32 (EXTRACT_SUBREG VReg_64:$src1, sub1))), sub1)
2393-
>;
2382+
// def : AMDGPUPat <
2383+
// (rotr v2i32:$src0, v2i32:$src1),
2384+
// (REG_SEQUENCE VReg_64,
2385+
// (V_ALIGNBIT_B32_e64
2386+
// (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2387+
// (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2388+
// (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0))), sub0,
2389+
// (V_ALIGNBIT_B32_e64
2390+
// (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2391+
// (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2392+
// (i32 (EXTRACT_SUBREG VReg_64:$src1, sub1))), sub1)
2393+
// >;
23942394

23952395
// Prevents regression in fneg-modifier-casting.ll along with modifications to XorCombine() when v2i32 or is legal.
23962396
def : AMDGPUPat <
@@ -2404,20 +2404,6 @@ def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24042404
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24052405
(V_ALIGNBIT_B32_e64 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
24062406
(i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
2407-
2408-
def : GCNPat <
2409-
(rotr v2i32:$src0, v2i32:$src1),
2410-
(REG_SEQUENCE VReg_64,
2411-
(V_ALIGNBIT_B32_e64
2412-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2413-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2414-
(i32 (EXTRACT_SUBREG VReg_64:$src1, sub0))), sub0,
2415-
(V_ALIGNBIT_B32_e64
2416-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2417-
(i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2418-
(i32 (EXTRACT_SUBREG VReg_64:$src1, sub1))), sub1)
2419-
>;
2420-
24212407
} // end True16Predicate = NotHasTrue16BitInsts
24222408

24232409
let True16Predicate = UseRealTrue16Insts in {
@@ -2436,20 +2422,6 @@ def : GCNPat <
24362422
/* clamp */ 0, /* op_sel */ 0)
24372423
>;
24382424

2439-
def : GCNPat <
2440-
(rotr v2i32:$src0, v2i32:$src1),
2441-
(REG_SEQUENCE VReg_64,
2442-
(V_ALIGNBIT_B32_t16_e64
2443-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2444-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2445-
0, (EXTRACT_SUBREG (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0)) ,lo16),0,0), sub0,
2446-
(V_ALIGNBIT_B32_t16_e64
2447-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2448-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2449-
0, (EXTRACT_SUBREG (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0)) ,lo16),0,0), sub1)
2450-
>;
2451-
2452-
24532425
def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
24542426
(V_ALIGNBIT_B32_t16_e64 0, /* src0_modifiers */
24552427
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
@@ -2476,20 +2448,6 @@ def : GCNPat <
24762448
$src1, /* clamp */ 0, /* op_sel */ 0)
24772449
>;
24782450

2479-
def : GCNPat <
2480-
(rotr v2i32:$src0, v2i32:$src1),
2481-
(REG_SEQUENCE VReg_64,
2482-
(V_ALIGNBIT_B32_fake16_e64
2483-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2484-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub0)),
2485-
0, (i32 (EXTRACT_SUBREG VReg_64:$src1, sub0)),0,0), sub0,
2486-
(V_ALIGNBIT_B32_fake16_e64
2487-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2488-
0, (i32 (EXTRACT_SUBREG VReg_64:$src0, sub1)),
2489-
0, (i32 (EXTRACT_SUBREG VReg_64:$src1, sub1)),0,0), sub1)
2490-
>;
2491-
2492-
24932451
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
24942452
(V_ALIGNBIT_B32_fake16_e64 0, /* src0_modifiers */
24952453
(i32 (EXTRACT_SUBREG (i64 $src0), sub1)),

0 commit comments

Comments
 (0)