Skip to content

Commit d4a9fb6

Browse files
committed
Modify allUsesHaveSourceMods() instead of foldFreeOpFromSelect()
This prevents any regressions in feng-modifier-casting.ll.
1 parent dd52635 commit d4a9fb6

File tree

2 files changed

+23
-26
lines changed

2 files changed

+23
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,19 @@ static bool selectSupportsSourceMods(const SDNode *N) {
721721
return N->getValueType(0) == MVT::f32;
722722
}
723723

724+
LLVM_READONLY
725+
static bool buildVectorSupportsSourceMods(const SDNode *N) {
726+
if (N->getValueType(0) != MVT::v2f32)
727+
return true;
728+
729+
SDValue LHS = N->getOperand(0);
730+
SDValue RHS = N->getOperand(1);
731+
if (LHS->getOpcode() != ISD::SELECT || RHS->getOpcode() != ISD::SELECT)
732+
return true;
733+
734+
return false;
735+
}
736+
724737
// Most FP instructions support source modifiers, but this could be refined
725738
// slightly.
726739
LLVM_READONLY
@@ -754,6 +767,8 @@ static bool hasSourceMods(const SDNode *N) {
754767
return true;
755768
}
756769
}
770+
case ISD::BUILD_VECTOR:
771+
return buildVectorSupportsSourceMods(N);
757772
case ISD::SELECT:
758773
return selectSupportsSourceMods(N);
759774
default:
@@ -4786,24 +4801,6 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
47864801
if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
47874802
return SDValue();
47884803

4789-
// select c, (fneg (f32 bitcast i32 x)), (fneg (f32 bitcast i32 y)) can be
4790-
// lowered directly to a V_CNDMASK_. So prevent the fneg from being pulled
4791-
// out in this case. For now I've made the logic as specific to the case as
4792-
// possible, hopefully this can be relaxed in future.
4793-
if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) {
4794-
SDValue LHSB = LHS.getOperand(0);
4795-
SDValue RHSB = RHS.getOperand(0);
4796-
if (LHSB.getOpcode() == ISD::BITCAST &&
4797-
RHSB->getOpcode() == ISD::BITCAST) {
4798-
EVT LHSBOpTy = LHSB->getOperand(0).getValueType();
4799-
EVT RHSBOpTy = RHSB->getOperand(0).getValueType();
4800-
if (LHSB.getValueType() == MVT::f32 &&
4801-
RHSB.getValueType() == MVT::f32 && LHSBOpTy == MVT::i32 &&
4802-
RHSBOpTy == MVT::i32)
4803-
return SDValue();
4804-
}
4805-
}
4806-
48074804
return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS,
48084805
RHS);
48094806
}

llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,12 +1645,12 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16451645
; GFX7-NEXT: v_mov_b32_e32 v0, s3
16461646
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16471647
; GFX7-NEXT: s_cselect_b32 s1, s1, s3
1648-
; GFX7-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1648+
; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16491649
; GFX7-NEXT: s_cselect_b32 s0, s0, s2
16501650
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16511651
; GFX7-NEXT: v_mov_b32_e32 v2, s4
16521652
; GFX7-NEXT: s_mov_b32 flat_scratch_lo, s13
1653-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1653+
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16541654
; GFX7-NEXT: v_mov_b32_e32 v0, s0
16551655
; GFX7-NEXT: v_mov_b32_e32 v3, s5
16561656
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
@@ -1669,10 +1669,10 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16691669
; GFX9-NEXT: v_mov_b32_e32 v0, s3
16701670
; GFX9-NEXT: v_mov_b32_e32 v1, s1
16711671
; GFX9-NEXT: s_cselect_b32 s1, s1, s3
1672-
; GFX9-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1672+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16731673
; GFX9-NEXT: s_cselect_b32 s0, s0, s2
16741674
; GFX9-NEXT: v_mov_b32_e32 v1, s1
1675-
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1675+
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16761676
; GFX9-NEXT: v_mov_b32_e32 v0, s0
16771677
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
16781678
; GFX9-NEXT: s_endpgm
@@ -1683,17 +1683,17 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16831683
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
16841684
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10
16851685
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x18
1686+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
16861687
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
16871688
; GFX11-NEXT: v_mov_b32_e32 v0, s1
16881689
; GFX11-NEXT: s_bitcmp1_b32 s6, 0
16891690
; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1690-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1691-
; GFX11-NEXT: v_cndmask_b32_e64 v0, -s3, -v0, vcc_lo
1691+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1692+
; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
16921693
; GFX11-NEXT: s_and_b32 s6, vcc_lo, exec_lo
16931694
; GFX11-NEXT: s_cselect_b32 s1, s1, s3
16941695
; GFX11-NEXT: s_cselect_b32 s0, s0, s2
1695-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1696-
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, s1, v0
1696+
; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, -v0, vcc_lo
16971697
; GFX11-NEXT: v_mov_b32_e32 v0, s0
16981698
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
16991699
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)