Skip to content

Commit 2b83cee

Browse files
mariusz-sikora-at-amdMateja Marjanovic
andauthored
[AMDGPU][GFX12] Default component broadcast store (#76212)
For image and buffer stores the default behaviour on GFX12 is to set all unset components to the value of the first component. So if we pass only X component, it will be the same as XXXX, or XY same as XYXX. This patch simplifies the passed vector of components in InstCombine by removing components from the end that are equal to the first component. For image stores it also trims DMask if necessary. --------- Co-authored-by: Mateja Marjanovic <[email protected]>
1 parent 528cd28 commit 2b83cee

File tree

4 files changed

+155
-26
lines changed

4 files changed

+155
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,18 @@ def FeatureFlatAtomicFaddF32Inst
719719
"Has flat_atomic_add_f32 instruction"
720720
>;
721721

722+
def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero",
723+
"HasDefaultComponentZero",
724+
"true",
725+
"BUFFER/IMAGE store instructions set unspecified components to zero"
726+
>;
727+
728+
def FeatureDefaultComponentBroadcast : SubtargetFeature<"default-component-broadcast",
729+
"HasDefaultComponentBroadcast",
730+
"true",
731+
"BUFFER/IMAGE store instructions set unspecified components to x component"
732+
>;
733+
722734
def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support",
723735
"SupportsSRAMECC",
724736
"true",
@@ -1003,7 +1015,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
10031015
FeatureWavefrontSize64, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10041016
FeatureDsSrc2Insts, FeatureLDSBankCount32, FeatureMovrel,
10051017
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
1006-
FeatureGDS, FeatureGWS
1018+
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
10071019
]
10081020
>;
10091021

@@ -1014,7 +1026,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
10141026
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
10151027
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10161028
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureUnalignedBufferAccess,
1017-
FeatureImageInsts, FeatureGDS, FeatureGWS
1029+
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
10181030
]
10191031
>;
10201032

@@ -1029,7 +1041,8 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
10291041
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
10301042
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
10311043
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
1032-
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS
1044+
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
1045+
FeatureDefaultComponentZero
10331046
]
10341047
>;
10351048

@@ -1047,7 +1060,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
10471060
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
10481061
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
10491062
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
1050-
FeatureNegativeScratchOffsetBug, FeatureGWS
1063+
FeatureNegativeScratchOffsetBug, FeatureGWS, FeatureDefaultComponentZero
10511064
]
10521065
>;
10531066

@@ -1067,7 +1080,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
10671080
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
10681081
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
10691082
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureImageInsts,
1070-
FeatureGDS, FeatureGWS
1083+
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero
10711084
]
10721085
>;
10731086

@@ -1087,7 +1100,7 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
10871100
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
10881101
FeatureA16, FeatureFastDenormalF32, FeatureG16,
10891102
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess, FeatureGDS,
1090-
FeatureGWS
1103+
FeatureGWS, FeatureDefaultComponentZero
10911104
]
10921105
>;
10931106

@@ -1107,7 +1120,7 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
11071120
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
11081121
FeatureA16, FeatureFastDenormalF32, FeatureG16,
11091122
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
1110-
FeatureTrue16BitInsts
1123+
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast
11111124
]
11121125
>;
11131126

@@ -2013,6 +2026,13 @@ def HasFlatAtomicFaddF32Inst
20132026
: Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">,
20142027
AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>;
20152028

2029+
def HasDefaultComponentZero
2030+
: Predicate<"Subtarget->hasDefaultComponentZero()">,
2031+
AssemblerPredicate<(all_of FeatureDefaultComponentZero)>;
2032+
def HasDefaultComponentBroadcast
2033+
: Predicate<"Subtarget->hasDefaultComponentBroadcast()">,
2034+
AssemblerPredicate<(all_of FeatureDefaultComponentBroadcast)>;
2035+
20162036
def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">,
20172037
AssemblerPredicate<(all_of FeatureDsSrc2Insts)>;
20182038

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,35 @@ static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
402402
return DemandedElts;
403403
}
404404

405+
// Trim elements of the end of the vector \p V, if they are
406+
// equal to the first element of the vector.
407+
static APInt defaultComponentBroadcast(Value *V) {
408+
auto *VTy = cast<FixedVectorType>(V->getType());
409+
unsigned VWidth = VTy->getNumElements();
410+
APInt DemandedElts = APInt::getAllOnes(VWidth);
411+
Value *FirstComponent = findScalarElement(V, 0);
412+
413+
SmallVector<int> ShuffleMask;
414+
if (auto *SVI = dyn_cast<ShuffleVectorInst>(V))
415+
SVI->getShuffleMask(ShuffleMask);
416+
417+
for (int I = VWidth - 1; I > 0; --I) {
418+
if (ShuffleMask.empty()) {
419+
auto *Elt = findScalarElement(V, I);
420+
if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt)))
421+
break;
422+
} else {
423+
// Detect identical elements in the shufflevector result, even though
424+
// findScalarElement cannot tell us what that element is.
425+
if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)
426+
break;
427+
}
428+
DemandedElts.clearBit(I);
429+
}
430+
431+
return DemandedElts;
432+
}
433+
405434
static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
406435
IntrinsicInst &II,
407436
APInt DemandedElts,
@@ -1140,8 +1169,13 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
11401169
if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
11411170
break;
11421171

1143-
APInt DemandedElts =
1144-
trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
1172+
APInt DemandedElts;
1173+
if (ST->hasDefaultComponentBroadcast())
1174+
DemandedElts = defaultComponentBroadcast(II.getArgOperand(0));
1175+
else if (ST->hasDefaultComponentZero())
1176+
DemandedElts = trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
1177+
else
1178+
break;
11451179

11461180
int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
11471181
if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
165165
bool HasAtomicCSubNoRtnInsts = false;
166166
bool HasAtomicGlobalPkAddBF16Inst = false;
167167
bool HasFlatAtomicFaddF32Inst = false;
168+
bool HasDefaultComponentZero = false;
169+
bool HasDefaultComponentBroadcast = false;
168170
bool SupportsSRAMECC = false;
169171

170172
// This should not be used directly. 'TargetID' tracks the dynamic settings
@@ -802,6 +804,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
802804

803805
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
804806

807+
bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
808+
809+
bool hasDefaultComponentBroadcast() const {
810+
return HasDefaultComponentBroadcast;
811+
}
812+
805813
bool hasNoSdstCMPX() const {
806814
return HasNoSdstCMPX;
807815
}

0 commit comments

Comments
 (0)