-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGISel] Fix MMRA Handling in copyExtraInfo #124730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
llvm#78569 did not implement this correctly, I didn't understand the function well enough and an edge case came up where it breaks it.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) Changes#78569 did not implement this correctly and an edge case breaks it by triggering Fixes SWDEV-507698 Full diff: https://github.com/llvm/llvm-project/pull/124730.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0f9790a10a1397..b416c0efbbc4fc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13635,7 +13635,7 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
// Use of operator[] on the DenseMap may cause an insertion, which invalidates
// the iterator, hence the need to make a copy to prevent a use-after-free.
NodeExtraInfo NEI = I->second;
- if (LLVM_LIKELY(!NEI.PCSections) && LLVM_LIKELY(!NEI.MMRA)) {
+ if (LLVM_LIKELY(!NEI.PCSections)) {
// No deep copy required for the types of extra info set.
//
// FIXME: Investigate if other types of extra info also need deep copy. This
diff --git a/llvm/test/CodeGen/AMDGPU/mmra.ll b/llvm/test/CodeGen/AMDGPU/mmra.ll
index 39650f4295c76e..d0696bf329af8d 100644
--- a/llvm/test/CodeGen/AMDGPU/mmra.ll
+++ b/llvm/test/CodeGen/AMDGPU/mmra.ll
@@ -17,7 +17,7 @@ define void @fence_loads(ptr %ptr) {
; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1
; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4)
- ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, mmra !2
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2
; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4)
; CHECK-NEXT: SI_RETURN
@@ -82,7 +82,7 @@ define void @atomicrmw_rel(ptr %ptr) {
; CHECK-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI1]], killed [[V_OR_B32_e64_]], implicit $exec
; CHECK-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1, mmra !2
+ ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !2
; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !2 :: (load store release monotonic (s32) on %ir.AlignedAddr)
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !2
@@ -140,7 +140,7 @@ define void @cmpxchg(ptr %ptr) {
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[PHI2]], [[V_LSHLREV_B32_e64_2]], implicit $exec
; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_OR_B32_e64_]], %subreg.sub0, [[PHI2]], %subreg.sub1, mmra !1
+ ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_OR_B32_e64_]], %subreg.sub0, [[PHI2]], %subreg.sub1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !1
; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !1 :: (load store acquire acquire (s32) on %ir.AlignedAddr)
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI2]], implicit $exec
@@ -180,6 +180,105 @@ define void @cmpxchg(ptr %ptr) {
ret void
}
+declare <32 x half> @f()
+
+; Variant of atomicrmw_rel that provoked a crash in SelectionDAG::copyExtraInfo
+define void @atomicrmw_rel_deepcopy(ptr %ptr) {
+ ; CHECK-LABEL: name: atomicrmw_rel_deepcopy
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr31
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f, target-flags(amdgpu-gotprel32-hi) @f, implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]]
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]]
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]]
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY7]]
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY6]]
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY5]]
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY4]]
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY3]]
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY]]
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]]
+ ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @f, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY $vgpr6
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY $vgpr7
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY $vgpr8
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY $vgpr9
+ ; CHECK-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY $vgpr10
+ ; CHECK-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY $vgpr11
+ ; CHECK-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY $vgpr12
+ ; CHECK-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY $vgpr13
+ ; CHECK-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY $vgpr14
+ ; CHECK-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY $vgpr15
+ ; CHECK-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4
+ ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY29]], killed [[S_MOV_B32_]], implicit $exec
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY11]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY30:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3
+ ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY29]], [[S_MOV_B32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[S_MOV_B32_1]], killed [[V_AND_B32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 255
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_2]], implicit $exec
+ ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[V_LSHLREV_B32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[COPY31:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !0
+ ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY31]], 0, 0, implicit $exec, implicit $flat_scr, mmra !0 :: (load (s32) from %ir.AlignedAddr)
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.atomicrmw.start:
+ ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %7, %bb.1
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[FLAT_LOAD_DWORD]], %bb.0, %6, %bb.1
+ ; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_NOT_B32_e32_]], [[V_LSHLREV_B32_e64_1]], implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI1]], killed [[V_OR_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY32:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !0
+ ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY30]], killed [[COPY32]], 0, 1, implicit $exec, implicit $flat_scr, mmra !0 :: (load store release monotonic (s32) on %ir.AlignedAddr)
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !0
+ ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U32_e64_]], [[PHI]], implicit-def dead $scc
+ ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.atomicrmw.end:
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[SI_IF_BREAK]], %bb.1
+ ; CHECK-NEXT: SI_END_CF [[PHI2]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: SI_RETURN
+ %C = call <32 x half> @f()
+ %old.2 = atomicrmw add ptr %ptr, i8 0 release, align 1, !mmra !0
+ ret void
+}
+
attributes #0 = { memory(read) }
attributes #1 = { memory(write) }
|
Can you elaborate what the edge case is? (I wish some of this info would be preserved in the commit logs, because it's easily lost in PR comments and such.) I'm unaware of
Based on this patch, it looks like |
The presence of the large call function at the start of the function appears to affect DAG building in a way that creates a load with a chain to the EntryNode and that appeared to trip this function (lambda returned false and then it hit the assert in the loop below) The tests look better after the change, with MMRAs being removed from non-memory instructions like MOV. For MMRAs what matters is that it stays on the memory instruction, so if a load gets replaced by something else, it needs to stay on the load but not on the, e.g. pointer arithmetic of the operand |
#78569 did not implement this correctly and an edge case breaks it by triggering
Assertion
!Leafs.empty()' failed.`Fixes SWDEV-507698