@@ -223,8 +223,9 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
223
223
};
224
224
}
225
225
226
- static bool isRegisterSize (unsigned Size) {
227
- return Size % 32 == 0 && Size <= MaxRegisterSize;
226
+ static bool isRegisterSize (const GCNSubtarget &ST, unsigned Size) {
227
+ return ((ST.useRealTrue16Insts () && Size == 16 ) || Size % 32 == 0 ) &&
228
+ Size <= MaxRegisterSize;
228
229
}
229
230
230
231
static bool isRegisterVectorElementType (LLT EltTy) {
@@ -240,8 +241,8 @@ static bool isRegisterVectorType(LLT Ty) {
240
241
}
241
242
242
243
// TODO: replace all uses of isRegisterType with isRegisterClassType
243
- static bool isRegisterType (LLT Ty) {
244
- if (!isRegisterSize (Ty.getSizeInBits ()))
244
+ static bool isRegisterType (const GCNSubtarget &ST, LLT Ty) {
245
+ if (!isRegisterSize (ST, Ty.getSizeInBits ()))
245
246
return false ;
246
247
247
248
if (Ty.isVector ())
@@ -252,19 +253,21 @@ static bool isRegisterType(LLT Ty) {
252
253
253
254
// Any combination of 32 or 64-bit elements up the maximum register size, and
254
255
// multiples of v2s16.
255
- static LegalityPredicate isRegisterType (unsigned TypeIdx) {
256
- return [=](const LegalityQuery &Query) {
257
- return isRegisterType (Query.Types [TypeIdx]);
256
+ static LegalityPredicate isRegisterType (const GCNSubtarget &ST,
257
+ unsigned TypeIdx) {
258
+ return [=, &ST](const LegalityQuery &Query) {
259
+ return isRegisterType (ST, Query.Types [TypeIdx]);
258
260
};
259
261
}
260
262
261
263
// RegisterType that doesn't have a corresponding RegClass.
262
264
// TODO: Once `isRegisterType` is replaced with `isRegisterClassType` this
263
265
// should be removed.
264
- static LegalityPredicate isIllegalRegisterType (unsigned TypeIdx) {
265
- return [=](const LegalityQuery &Query) {
266
+ static LegalityPredicate isIllegalRegisterType (const GCNSubtarget &ST,
267
+ unsigned TypeIdx) {
268
+ return [=, &ST](const LegalityQuery &Query) {
266
269
LLT Ty = Query.Types [TypeIdx];
267
- return isRegisterType (Ty) &&
270
+ return isRegisterType (ST, Ty) &&
268
271
!SIRegisterInfo::getSGPRClassForBitWidth (Ty.getSizeInBits ());
269
272
};
270
273
}
@@ -348,17 +351,20 @@ static std::initializer_list<LLT> AllS64Vectors = {V2S64, V3S64, V4S64, V5S64,
348
351
V6S64, V7S64, V8S64, V16S64};
349
352
350
353
// Checks whether a type is in the list of legal register types.
351
- static bool isRegisterClassType (LLT Ty) {
354
+ static bool isRegisterClassType (const GCNSubtarget &ST, LLT Ty) {
352
355
if (Ty.isPointerOrPointerVector ())
353
356
Ty = Ty.changeElementType (LLT::scalar (Ty.getScalarSizeInBits ()));
354
357
355
358
return is_contained (AllS32Vectors, Ty) || is_contained (AllS64Vectors, Ty) ||
356
- is_contained (AllScalarTypes, Ty) || is_contained (AllS16Vectors, Ty);
359
+ is_contained (AllScalarTypes, Ty) ||
360
+ (ST.useRealTrue16Insts () && Ty == S16) ||
361
+ is_contained (AllS16Vectors, Ty);
357
362
}
358
363
359
- static LegalityPredicate isRegisterClassType (unsigned TypeIdx) {
360
- return [TypeIdx](const LegalityQuery &Query) {
361
- return isRegisterClassType (Query.Types [TypeIdx]);
364
+ static LegalityPredicate isRegisterClassType (const GCNSubtarget &ST,
365
+ unsigned TypeIdx) {
366
+ return [&ST, TypeIdx](const LegalityQuery &Query) {
367
+ return isRegisterClassType (ST, Query.Types [TypeIdx]);
362
368
};
363
369
}
364
370
@@ -510,7 +516,7 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
510
516
511
517
static bool isLoadStoreLegal (const GCNSubtarget &ST, const LegalityQuery &Query) {
512
518
const LLT Ty = Query.Types [0 ];
513
- return isRegisterType (Ty) && isLoadStoreSizeLegal (ST, Query) &&
519
+ return isRegisterType (ST, Ty) && isLoadStoreSizeLegal (ST, Query) &&
514
520
!hasBufferRsrcWorkaround (Ty) && !loadStoreBitcastWorkaround (Ty);
515
521
}
516
522
@@ -523,12 +529,12 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
523
529
if (Size != MemSizeInBits)
524
530
return Size <= 32 && Ty.isVector ();
525
531
526
- if (loadStoreBitcastWorkaround (Ty) && isRegisterType (Ty))
532
+ if (loadStoreBitcastWorkaround (Ty) && isRegisterType (ST, Ty))
527
533
return true ;
528
534
529
535
// Don't try to handle bitcasting vector ext loads for now.
530
536
return Ty.isVector () && (!MemTy.isVector () || MemTy == Ty) &&
531
- (Size <= 32 || isRegisterSize (Size)) &&
537
+ (Size <= 32 || isRegisterSize (ST, Size)) &&
532
538
!isRegisterVectorElementType (Ty.getElementType ());
533
539
}
534
540
@@ -875,7 +881,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
875
881
876
882
getActionDefinitionsBuilder (G_BITCAST)
877
883
// Don't worry about the size constraint.
878
- .legalIf (all (isRegisterClassType (0 ), isRegisterClassType (1 )))
884
+ .legalIf (all (isRegisterClassType (ST, 0 ), isRegisterClassType (ST, 1 )))
879
885
.lower ();
880
886
881
887
getActionDefinitionsBuilder (G_CONSTANT)
@@ -890,7 +896,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
890
896
.clampScalar (0 , S16, S64);
891
897
892
898
getActionDefinitionsBuilder ({G_IMPLICIT_DEF, G_FREEZE})
893
- .legalIf (isRegisterClassType (0 ))
899
+ .legalIf (isRegisterClassType (ST, 0 ))
894
900
// s1 and s16 are special cases because they have legal operations on
895
901
// them, but don't really occupy registers in the normal way.
896
902
.legalFor ({S1, S16})
@@ -1779,7 +1785,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
1779
1785
unsigned IdxTypeIdx = 2 ;
1780
1786
1781
1787
getActionDefinitionsBuilder (Op)
1782
- .customIf ([=](const LegalityQuery &Query) {
1788
+ .customIf ([=](const LegalityQuery &Query) {
1783
1789
const LLT EltTy = Query.Types [EltTypeIdx];
1784
1790
const LLT VecTy = Query.Types [VecTypeIdx];
1785
1791
const LLT IdxTy = Query.Types [IdxTypeIdx];
@@ -1800,36 +1806,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
1800
1806
IdxTy.getSizeInBits () == 32 &&
1801
1807
isLegalVecType;
1802
1808
})
1803
- .bitcastIf (all (sizeIsMultipleOf32 (VecTypeIdx), scalarOrEltNarrowerThan (VecTypeIdx, 32 )),
1804
- bitcastToVectorElement32 (VecTypeIdx))
1805
- // .bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
1806
- .bitcastIf (
1807
- all (sizeIsMultipleOf32 (VecTypeIdx), scalarOrEltWiderThan (VecTypeIdx, 64 )),
1808
- [=](const LegalityQuery &Query) {
1809
- // For > 64-bit element types, try to turn this into a 64-bit
1810
- // element vector since we may be able to do better indexing
1811
- // if this is scalar. If not, fall back to 32.
1812
- const LLT EltTy = Query.Types [EltTypeIdx];
1813
- const LLT VecTy = Query.Types [VecTypeIdx];
1814
- const unsigned DstEltSize = EltTy.getSizeInBits ();
1815
- const unsigned VecSize = VecTy.getSizeInBits ();
1816
-
1817
- const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32 ;
1818
- return std::pair (
1819
- VecTypeIdx,
1820
- LLT::fixed_vector (VecSize / TargetEltSize, TargetEltSize));
1821
- })
1822
- .clampScalar (EltTypeIdx, S32, S64)
1823
- .clampScalar (VecTypeIdx, S32, S64)
1824
- .clampScalar (IdxTypeIdx, S32, S32)
1825
- .clampMaxNumElements (VecTypeIdx, S32, 32 )
1826
- // TODO: Clamp elements for 64-bit vectors?
1827
- .moreElementsIf (
1828
- isIllegalRegisterType (VecTypeIdx),
1829
- moreElementsToNextExistingRegClass (VecTypeIdx))
1830
- // It should only be necessary with variable indexes.
1831
- // As a last resort, lower to the stack
1832
- .lower ();
1809
+ .bitcastIf (all (sizeIsMultipleOf32 (VecTypeIdx),
1810
+ scalarOrEltNarrowerThan (VecTypeIdx, 32 )),
1811
+ bitcastToVectorElement32 (VecTypeIdx))
1812
+ // .bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
1813
+ .bitcastIf (all (sizeIsMultipleOf32 (VecTypeIdx),
1814
+ scalarOrEltWiderThan (VecTypeIdx, 64 )),
1815
+ [=](const LegalityQuery &Query) {
1816
+ // For > 64-bit element types, try to turn this into a
1817
+ // 64-bit element vector since we may be able to do better
1818
+ // indexing if this is scalar. If not, fall back to 32.
1819
+ const LLT EltTy = Query.Types [EltTypeIdx];
1820
+ const LLT VecTy = Query.Types [VecTypeIdx];
1821
+ const unsigned DstEltSize = EltTy.getSizeInBits ();
1822
+ const unsigned VecSize = VecTy.getSizeInBits ();
1823
+
1824
+ const unsigned TargetEltSize =
1825
+ DstEltSize % 64 == 0 ? 64 : 32 ;
1826
+ return std::pair (VecTypeIdx,
1827
+ LLT::fixed_vector (VecSize / TargetEltSize,
1828
+ TargetEltSize));
1829
+ })
1830
+ .clampScalar (EltTypeIdx, S32, S64)
1831
+ .clampScalar (VecTypeIdx, S32, S64)
1832
+ .clampScalar (IdxTypeIdx, S32, S32)
1833
+ .clampMaxNumElements (VecTypeIdx, S32, 32 )
1834
+ // TODO: Clamp elements for 64-bit vectors?
1835
+ .moreElementsIf (isIllegalRegisterType (ST, VecTypeIdx),
1836
+ moreElementsToNextExistingRegClass (VecTypeIdx))
1837
+ // It should only be necessary with variable indexes.
1838
+ // As a last resort, lower to the stack
1839
+ .lower ();
1833
1840
}
1834
1841
1835
1842
getActionDefinitionsBuilder (G_EXTRACT_VECTOR_ELT)
@@ -1876,15 +1883,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
1876
1883
1877
1884
}
1878
1885
1879
- auto &BuildVector = getActionDefinitionsBuilder (G_BUILD_VECTOR)
1880
- . legalForCartesianProduct (AllS32Vectors, {S32} )
1881
- .legalForCartesianProduct (AllS64Vectors , {S64 })
1882
- . clampNumElements ( 0 , V16S32, V32S32 )
1883
- .clampNumElements (0 , V2S64, V16S64 )
1884
- . fewerElementsIf ( isWideVec16 ( 0 ), changeTo ( 0 , V2S16) )
1885
- . moreElementsIf (
1886
- isIllegalRegisterType (0 ),
1887
- moreElementsToNextExistingRegClass (0 ));
1886
+ auto &BuildVector =
1887
+ getActionDefinitionsBuilder (G_BUILD_VECTOR )
1888
+ .legalForCartesianProduct (AllS32Vectors , {S32 })
1889
+ . legalForCartesianProduct (AllS64Vectors, {S64} )
1890
+ .clampNumElements (0 , V16S32, V32S32 )
1891
+ . clampNumElements ( 0 , V2S64, V16S64 )
1892
+ . fewerElementsIf ( isWideVec16 ( 0 ), changeTo ( 0 , V2S16))
1893
+ . moreElementsIf ( isIllegalRegisterType (ST, 0 ),
1894
+ moreElementsToNextExistingRegClass (0 ));
1888
1895
1889
1896
if (ST.hasScalarPackInsts ()) {
1890
1897
BuildVector
@@ -1904,14 +1911,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
1904
1911
.lower ();
1905
1912
}
1906
1913
1907
- BuildVector.legalIf (isRegisterType (0 ));
1914
+ BuildVector.legalIf (isRegisterType (ST, 0 ));
1908
1915
1909
1916
// FIXME: Clamp maximum size
1910
1917
getActionDefinitionsBuilder (G_CONCAT_VECTORS)
1911
- .legalIf (all (isRegisterType (0 ), isRegisterType (1 )))
1912
- .clampMaxNumElements (0 , S32, 32 )
1913
- .clampMaxNumElements (1 , S16, 2 ) // TODO: Make 4?
1914
- .clampMaxNumElements (0 , S16, 64 );
1918
+ .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
1919
+ .clampMaxNumElements (0 , S32, 32 )
1920
+ .clampMaxNumElements (1 , S16, 2 ) // TODO: Make 4?
1921
+ .clampMaxNumElements (0 , S16, 64 );
1915
1922
1916
1923
getActionDefinitionsBuilder (G_SHUFFLE_VECTOR).lower ();
1917
1924
@@ -1932,34 +1939,40 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
1932
1939
return false ;
1933
1940
};
1934
1941
1935
- auto &Builder = getActionDefinitionsBuilder (Op)
1936
- .legalIf (all (isRegisterType (0 ), isRegisterType (1 )))
1937
- .lowerFor ({{S16, V2S16}})
1938
- .lowerIf ([=](const LegalityQuery &Query) {
1939
- const LLT BigTy = Query.Types [BigTyIdx];
1940
- return BigTy.getSizeInBits () == 32 ;
1941
- })
1942
- // Try to widen to s16 first for small types.
1943
- // TODO: Only do this on targets with legal s16 shifts
1944
- .minScalarOrEltIf (scalarNarrowerThan (LitTyIdx, 16 ), LitTyIdx, S16)
1945
- .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 16 )
1946
- .moreElementsIf (isSmallOddVector (BigTyIdx), oneMoreElement (BigTyIdx))
1947
- .fewerElementsIf (all (typeIs (0 , S16), vectorWiderThan (1 , 32 ),
1948
- elementTypeIs (1 , S16)),
1949
- changeTo (1 , V2S16))
1950
- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
1951
- // worth considering the multiples of 64 since 2*192 and 2*384 are not
1952
- // valid.
1953
- .clampScalar (LitTyIdx, S32, S512)
1954
- .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 32 )
1955
- // Break up vectors with weird elements into scalars
1956
- .fewerElementsIf (
1957
- [=](const LegalityQuery &Query) { return notValidElt (Query, LitTyIdx); },
1958
- scalarize (0 ))
1959
- .fewerElementsIf (
1960
- [=](const LegalityQuery &Query) { return notValidElt (Query, BigTyIdx); },
1961
- scalarize (1 ))
1962
- .clampScalar (BigTyIdx, S32, MaxScalar);
1942
+ auto &Builder =
1943
+ getActionDefinitionsBuilder (Op)
1944
+ .legalIf (all (isRegisterType (ST, 0 ), isRegisterType (ST, 1 )))
1945
+ .lowerFor ({{S16, V2S16}})
1946
+ .lowerIf ([=](const LegalityQuery &Query) {
1947
+ const LLT BigTy = Query.Types [BigTyIdx];
1948
+ return BigTy.getSizeInBits () == 32 ;
1949
+ })
1950
+ // Try to widen to s16 first for small types.
1951
+ // TODO: Only do this on targets with legal s16 shifts
1952
+ .minScalarOrEltIf (scalarNarrowerThan (LitTyIdx, 16 ), LitTyIdx, S16)
1953
+ .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 16 )
1954
+ .moreElementsIf (isSmallOddVector (BigTyIdx),
1955
+ oneMoreElement (BigTyIdx))
1956
+ .fewerElementsIf (all (typeIs (0 , S16), vectorWiderThan (1 , 32 ),
1957
+ elementTypeIs (1 , S16)),
1958
+ changeTo (1 , V2S16))
1959
+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's
1960
+ // not worth considering the multiples of 64 since 2*192 and 2*384
1961
+ // are not valid.
1962
+ .clampScalar (LitTyIdx, S32, S512)
1963
+ .widenScalarToNextPow2 (LitTyIdx, /* Min*/ 32 )
1964
+ // Break up vectors with weird elements into scalars
1965
+ .fewerElementsIf (
1966
+ [=](const LegalityQuery &Query) {
1967
+ return notValidElt (Query, LitTyIdx);
1968
+ },
1969
+ scalarize (0 ))
1970
+ .fewerElementsIf (
1971
+ [=](const LegalityQuery &Query) {
1972
+ return notValidElt (Query, BigTyIdx);
1973
+ },
1974
+ scalarize (1 ))
1975
+ .clampScalar (BigTyIdx, S32, MaxScalar);
1963
1976
1964
1977
if (Op == G_MERGE_VALUES) {
1965
1978
Builder.widenScalarIf (
@@ -3146,7 +3159,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
3146
3159
} else {
3147
3160
// Extract the subvector.
3148
3161
3149
- if (isRegisterType (ValTy)) {
3162
+ if (isRegisterType (ST, ValTy)) {
3150
3163
// If this a case where G_EXTRACT is legal, use it.
3151
3164
// (e.g. <3 x s32> -> <4 x s32>)
3152
3165
WideLoad = B.buildLoadFromOffset (WideTy, PtrReg, *MMO, 0 ).getReg (0 );
0 commit comments