Skip to content

Commit 7e71a03

Browse files
committed
[AMDGPU] Split FeatureAtomicFaddInsts
FeatureAtomicFaddInsts is replaced with three more granular features. Contributors: Petar Avramovic <[email protected]> Patch 3/N for upstreaming of AMDGPU gfx11 architecture Depends on D124537 Reviewed By: foad, #amdgpu, arsenm Differential Revision: https://reviews.llvm.org/D124538
1 parent aff52d1 commit 7e71a03

File tree

7 files changed

+71
-31
lines changed

7 files changed

+71
-31
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -566,11 +566,28 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
566566
"Has v_pk_fmac_f16 instruction"
567567
>;
568568

569-
def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts",
570-
"HasAtomicFaddInsts",
569+
def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts",
570+
"HasAtomicFaddRtnInsts",
571571
"true",
572-
"Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, "
573-
"global_atomic_pk_add_f16 instructions",
572+
"Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that "
573+
"return original value",
574+
[FeatureFlatGlobalInsts]
575+
>;
576+
577+
def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
578+
"HasAtomicFaddNoRtnInsts",
579+
"true",
580+
"Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that "
581+
"don't return original value",
582+
[FeatureFlatGlobalInsts]
583+
>;
584+
585+
def FeatureAtomicPkFaddNoRtnInsts
586+
: SubtargetFeature<"atomic-pk-fadd-no-rtn-insts",
587+
"HasAtomicPkFaddNoRtnInsts",
588+
"true",
589+
"Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that "
590+
"don't return original value",
574591
[FeatureFlatGlobalInsts]
575592
>;
576593

@@ -988,7 +1005,8 @@ def FeatureISAVersion9_0_8 : FeatureSet<
9881005
FeatureDot7Insts,
9891006
FeatureMAIInsts,
9901007
FeaturePkFmacF16Inst,
991-
FeatureAtomicFaddInsts,
1008+
FeatureAtomicFaddNoRtnInsts,
1009+
FeatureAtomicPkFaddNoRtnInsts,
9921010
FeatureSupportsSRAMECC,
9931011
FeatureMFMAInlineLiteralBug,
9941012
FeatureImageGather4D16Bug]>;
@@ -1020,7 +1038,9 @@ def FeatureISAVersion9_0_A : FeatureSet<
10201038
FeaturePackedFP32Ops,
10211039
FeatureMAIInsts,
10221040
FeaturePkFmacF16Inst,
1023-
FeatureAtomicFaddInsts,
1041+
FeatureAtomicFaddRtnInsts,
1042+
FeatureAtomicFaddNoRtnInsts,
1043+
FeatureAtomicPkFaddNoRtnInsts,
10241044
FeatureImageInsts,
10251045
FeatureMadMacF32Insts,
10261046
FeatureSupportsSRAMECC,
@@ -1055,7 +1075,9 @@ def FeatureISAVersion9_4_0 : FeatureSet<
10551075
FeaturePackedFP32Ops,
10561076
FeatureMAIInsts,
10571077
FeaturePkFmacF16Inst,
1058-
FeatureAtomicFaddInsts,
1078+
FeatureAtomicFaddRtnInsts,
1079+
FeatureAtomicFaddNoRtnInsts,
1080+
FeatureAtomicPkFaddNoRtnInsts,
10591081
FeatureSupportsSRAMECC,
10601082
FeaturePackedTID,
10611083
FeatureArchitectedFlatScratch,
@@ -1544,8 +1566,13 @@ def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
15441566
def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">,
15451567
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
15461568

1547-
def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">,
1548-
AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>;
1569+
def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">,
1570+
AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>;
1571+
def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">,
1572+
AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>;
1573+
def HasAtomicPkFaddNoRtnInsts
1574+
: Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">,
1575+
AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>;
15491576

15501577
def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
15511578
AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,23 +1119,25 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
11191119
def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
11201120
int_amdgcn_buffer_wbinvl1>;
11211121

1122-
let SubtargetPredicate = HasAtomicFaddInsts in {
1123-
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
1122+
let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
1123+
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN<
11241124
"buffer_atomic_add_f32", VGPR_32, f32
11251125
>;
1126+
1127+
let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in
11261128
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
11271129
"buffer_atomic_pk_add_f16", VGPR_32, v2f16
11281130
>;
11291131

1130-
let OtherPredicates = [isGFX90APlus] in {
1131-
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN <
1132+
let OtherPredicates = [HasAtomicFaddRtnInsts] in
1133+
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
11321134
"buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32
11331135
>;
1136+
1137+
let OtherPredicates = [isGFX90APlus] in
11341138
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
11351139
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32
11361140
>;
1137-
}
1138-
} // End SubtargetPredicate = HasAtomicFaddInsts
11391141

11401142
//===----------------------------------------------------------------------===//
11411143
// MTBUF Instructions
@@ -1597,10 +1599,14 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
15971599
>;
15981600
}
15991601

1600-
let SubtargetPredicate = HasAtomicFaddInsts in {
1602+
let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
16011603
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">;
1604+
1605+
let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in
16021606
defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">;
1603-
}
1607+
1608+
let SubtargetPredicate = HasAtomicFaddRtnInsts in
1609+
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32">;
16041610

16051611
let SubtargetPredicate = isGFX90APlus in {
16061612
defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
@@ -2634,12 +2640,12 @@ def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
26342640
def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
26352641
} // End AssemblerPredicate = isGFX8GFX9
26362642

2637-
let SubtargetPredicate = HasAtomicFaddInsts in {
2643+
let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
26382644

26392645
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
26402646
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
26412647

2642-
} // End SubtargetPredicate = HasAtomicFaddInsts
2648+
} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts
26432649

26442650
let SubtargetPredicate = isGFX90APlus in {
26452651
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -901,23 +901,22 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
901901
} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
902902

903903
let is_flat_global = 1 in {
904-
let OtherPredicates = [HasAtomicFaddInsts] in {
904+
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
905905
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
906906
"global_atomic_add_f32", VGPR_32, f32
907907
>;
908+
let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
908909
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
909910
"global_atomic_pk_add_f16", VGPR_32, v2f16
910911
>;
911-
} // End OtherPredicates = [HasAtomicFaddInsts]
912-
913-
let OtherPredicates = [isGFX90APlus] in {
912+
let OtherPredicates = [HasAtomicFaddRtnInsts] in
914913
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
915914
"global_atomic_add_f32", VGPR_32, f32
916915
>;
916+
let OtherPredicates = [isGFX90APlus] in
917917
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
918918
"global_atomic_pk_add_f16", VGPR_32, v2f16
919919
>;
920-
} // End OtherPredicates = [isGFX90APlus]
921920
} // End is_flat_global = 1
922921

923922
//===----------------------------------------------------------------------===//
@@ -1445,10 +1444,10 @@ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_ato
14451444
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
14461445
}
14471446

1448-
let OtherPredicates = [HasAtomicFaddInsts] in {
1447+
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
14491448
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>;
1449+
let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
14501450
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>;
1451-
}
14521451

14531452
let OtherPredicates = [isGFX90APlus] in {
14541453
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
143143
bool HasDot7Insts = false;
144144
bool HasMAIInsts = false;
145145
bool HasPkFmacF16Inst = false;
146-
bool HasAtomicFaddInsts = false;
146+
bool HasAtomicFaddRtnInsts = false;
147+
bool HasAtomicFaddNoRtnInsts = false;
148+
bool HasAtomicPkFaddNoRtnInsts = false;
147149
bool SupportsSRAMECC = false;
148150

149151
// This should not be used directly. 'TargetID' tracks the dynamic settings
@@ -709,9 +711,15 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
709711
}
710712

711713
bool hasAtomicFaddInsts() const {
712-
return HasAtomicFaddInsts;
714+
return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
713715
}
714716

717+
bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }
718+
719+
bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
720+
721+
bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
722+
715723
bool hasNoSdstCMPX() const {
716724
return HasNoSdstCMPX;
717725
}

llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(float ad
2323
ret void
2424
}
2525

26-
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" }
26+
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" }

llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -800,5 +800,5 @@ define amdgpu_kernel void @infer_as_before_atomic(float* addrspace(4)* %arg) #0
800800
}
801801

802802
attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
803-
attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" }
803+
attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" }
804804
attributes #2 = { "amdgpu-unsafe-fp-atomics"="true" }

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,4 @@ define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspac
9999
ret void
100100
}
101101

102-
attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }
102+
attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"}

0 commit comments

Comments
 (0)