@@ -16075,56 +16075,50 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
16075
16075
return AtomicExpansionKind::CmpXChg;
16076
16076
}
16077
16077
16078
- if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() || !Ty->isDoubleTy()))
16078
+ if (!AMDGPU::isFlatGlobalAddrSpace(AS) &&
16079
+ AS != AMDGPUAS::BUFFER_FAT_POINTER)
16079
16080
return AtomicExpansionKind::CmpXChg;
16080
16081
16081
- if ((AMDGPU::isFlatGlobalAddrSpace(AS) ||
16082
- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16083
- Subtarget->hasAtomicFaddNoRtnInsts()) {
16084
- if (Subtarget->hasGFX940Insts())
16085
- return AtomicExpansionKind::None;
16082
+ // TODO: gfx940 supports v2f16 and v2bf16
16083
+ if (Subtarget->hasGFX940Insts() && (Ty->isFloatTy() || Ty->isDoubleTy()))
16084
+ return AtomicExpansionKind::None;
16086
16085
16087
- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16088
- return AtomicExpansionKind::CmpXChg;
16086
+ if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16087
+ return AtomicExpansionKind::CmpXChg;
16089
16088
16090
- // Always expand system scope fp atomics.
16091
- if (HasSystemScope)
16092
- return AtomicExpansionKind::CmpXChg;
16089
+ // Always expand system scope fp atomics.
16090
+ if (HasSystemScope)
16091
+ return AtomicExpansionKind::CmpXChg;
16093
16092
16094
- if ((AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16095
- AS == AMDGPUAS::BUFFER_FAT_POINTER) &&
16096
- Ty->isFloatTy()) {
16097
- // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16098
- if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16099
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16100
- // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16101
- if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16102
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16103
- }
16093
+ // global and flat atomic fadd f64: gfx90a, gfx940.
16094
+ if (Subtarget->hasGFX90AInsts() && Ty->isDoubleTy())
16095
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16104
16096
16105
- // flat atomic fadd f32: gfx940, gfx11+.
16106
- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16107
- Subtarget->hasFlatAtomicFaddF32Inst())
16097
+ if (AS != AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16098
+ // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940, gfx11+.
16099
+ if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16100
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16101
+ // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
16102
+ if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16108
16103
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16104
+ }
16109
16105
16110
- // global and flat atomic fadd f64: gfx90a, gfx940.
16111
- if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts())
16106
+ // flat atomic fadd f32: gfx940, gfx11+.
16107
+ if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
16108
+ if (Subtarget->hasFlatAtomicFaddF32Inst())
16112
16109
return ReportUnsafeHWInst(AtomicExpansionKind::None);
16113
16110
16114
16111
// If it is in flat address space, and the type is float, we will try to
16115
16112
// expand it, if the target supports global and lds atomic fadd. The
16116
16113
// reason we need that is, in the expansion, we emit the check of address
16117
16114
// space. If it is in global address space, we emit the global atomic
16118
16115
// fadd; if it is in shared address space, we emit the LDS atomic fadd.
16119
- if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() &&
16120
- Subtarget->hasLDSFPAtomicAddF32()) {
16116
+ if (Subtarget->hasLDSFPAtomicAddF32()) {
16121
16117
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
16122
16118
return AtomicExpansionKind::Expand;
16123
16119
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
16124
16120
return AtomicExpansionKind::Expand;
16125
16121
}
16126
-
16127
- return AtomicExpansionKind::CmpXChg;
16128
16122
}
16129
16123
16130
16124
return AtomicExpansionKind::CmpXChg;
0 commit comments