@@ -16075,56 +16075,49 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
16075
16075
return AtomicExpansionKind::CmpXChg;
16076
16076
}
16077
16077
16078
- if (!Ty->isFloatTy( ) && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()) )
16078
+ if (!AMDGPU::isFlatGlobalAddrSpace(AS ) && AS != AMDGPUAS::BUFFER_FAT_POINTER )
16079
16079
return AtomicExpansionKind::CmpXChg;
16080
16080
16081
- if ((AMDGPU::isFlatGlobalAddrSpace(AS) ||
16082
- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16083
- Subtarget->hasAtomicFaddNoRtnInsts()) {
16084
- if (Subtarget->hasGFX940Insts())
16085
- return AtomicExpansionKind::None;
16081
+ // TODO: gfx940 supports v2f16 and v2bf16
16082
+ if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
16083
+ return AtomicExpansionKind::None;
16086
16084
16087
- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16088
- return AtomicExpansionKind::CmpXChg;
16085
+ if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16086
+ return AtomicExpansionKind::CmpXChg;
16089
16087
16090
- // Always expand system scope fp atomics.
16091
- if (HasSystemScope)
16092
- return AtomicExpansionKind::CmpXChg;
16088
+ // Always expand system scope fp atomics.
16089
+ if (HasSystemScope)
16090
+ return AtomicExpansionKind::CmpXChg;
16093
16091
16094
- if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16095
- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16096
- Ty->isFloatTy()) {
16097
- // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098
- if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100
- // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101
- if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16102
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103
- }
16092
+ // global and flat atomic fadd f64: gfx90a, gfx940.
16093
+ if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16094
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16104
16095
16105
- // flat atomic fadd f32: gfx940, gfx11+.
16106
- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16107
- Subtarget->hasFlatAtomicFaddF32Inst())
16096
+ if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16097
+ // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098
+ if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100
+ // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101
+ if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16108
16102
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103
+ }
16109
16104
16110
- // global and flat atomic fadd f64: gfx90a, gfx940.
16111
- if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
16105
+ // flat atomic fadd f32: gfx940, gfx11+.
16106
+ if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16107
+ if (Subtarget->hasFlatAtomicFaddF32Inst())
16112
16108
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16113
16109
16114
16110
// If it is in flat address space, and the type is float, we will try to
16115
16111
// expand it, if the target supports global and lds atomic fadd. The
16116
16112
// reason we need that is, in the expansion, we emit the check of address
16117
16113
// space. If it is in global address space, we emit the global atomic
16118
16114
// fadd; if it is in shared address space, we emit the LDS atomic fadd.
16119
- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16120
- Subtarget->hasLDSFPAtomicAddF32()) {
16115
+ if (Subtarget->hasLDSFPAtomicAddF32()) {
16121
16116
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16122
16117
return AtomicExpansionKind::Expand;
16123
16118
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16124
16119
return AtomicExpansionKind::Expand;
16125
16120
}
16126
-
16127
- return AtomicExpansionKind::CmpXChg;
16128
16121
}
16129
16122
16130
16123
return AtomicExpansionKind::CmpXChg;
0 commit comments