@@ -215,19 +215,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
215
215
.legalFor ({s64, v8s16, v16s8, v4s32})
216
216
.lower ();
217
217
218
- auto &MinMaxActions = getActionDefinitionsBuilder (
219
- {G_SMIN, G_SMAX, G_UMIN, G_UMAX});
220
- if (HasCSSC)
221
- MinMaxActions
222
- .legalFor ({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
223
- // Making clamping conditional on CSSC extension as without legal types we
224
- // lower to CMP which can fold one of the two sxtb's we'd otherwise need
225
- // if we detect a type smaller than 32-bit.
226
- .minScalar (0 , s32);
227
- else
228
- MinMaxActions
229
- .legalFor ({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32});
230
- MinMaxActions
218
+ getActionDefinitionsBuilder ({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
219
+ .legalFor ({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
220
+ .legalFor (HasCSSC, {s32, s64})
221
+ .minScalar (HasCSSC, 0 , s32)
231
222
.clampNumElements (0 , v8s8, v16s8)
232
223
.clampNumElements (0 , v4s16, v8s16)
233
224
.clampNumElements (0 , v2s32, v4s32)
@@ -247,11 +238,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
247
238
{G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FSQRT, G_FMAXNUM, G_FMINNUM,
248
239
G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT, G_FNEARBYINT,
249
240
G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
250
- .legalFor ({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
251
- .legalIf ([=](const LegalityQuery &Query) {
252
- const auto &Ty = Query.Types [0 ];
253
- return (Ty == v8s16 || Ty == v4s16) && HasFP16;
254
- })
241
+ .legalFor ({s32, s64, v2s32, v4s32, v2s64})
242
+ .legalFor (HasFP16, {s16, v4s16, v8s16})
255
243
.libcallFor ({s128})
256
244
.scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
257
245
.minScalarOrElt (0 , MinFPScalar)
@@ -261,11 +249,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
261
249
.moreElementsToNextPow2 (0 );
262
250
263
251
getActionDefinitionsBuilder ({G_FABS, G_FNEG})
264
- .legalFor ({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
265
- .legalIf ([=](const LegalityQuery &Query) {
266
- const auto &Ty = Query.Types [0 ];
267
- return (Ty == v8s16 || Ty == v4s16) && HasFP16;
268
- })
252
+ .legalFor ({s32, s64, v2s32, v4s32, v2s64})
253
+ .legalFor (HasFP16, {s16, v4s16, v8s16})
269
254
.scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
270
255
.lowerIf (scalarOrEltWiderThan (0 , 64 ))
271
256
.clampNumElements (0 , v4s16, v8s16)
@@ -350,31 +335,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
350
335
return ValTy.isPointerVector () && ValTy.getAddressSpace () == 0 ;
351
336
};
352
337
353
- auto &LoadActions = getActionDefinitionsBuilder (G_LOAD);
354
- auto &StoreActions = getActionDefinitionsBuilder (G_STORE);
355
-
356
- if (ST.hasSVE ()) {
357
- LoadActions.legalForTypesWithMemDesc ({
358
- // 128 bit base sizes
359
- {nxv16s8, p0, nxv16s8, 8 },
360
- {nxv8s16, p0, nxv8s16, 8 },
361
- {nxv4s32, p0, nxv4s32, 8 },
362
- {nxv2s64, p0, nxv2s64, 8 },
363
- });
364
-
365
- // TODO: Add nxv2p0. Consider bitcastIf.
366
- // See #92130
367
- // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
368
- StoreActions.legalForTypesWithMemDesc ({
369
- // 128 bit base sizes
370
- {nxv16s8, p0, nxv16s8, 8 },
371
- {nxv8s16, p0, nxv8s16, 8 },
372
- {nxv4s32, p0, nxv4s32, 8 },
373
- {nxv2s64, p0, nxv2s64, 8 },
374
- });
375
- }
376
-
377
- LoadActions
338
+ getActionDefinitionsBuilder (G_LOAD)
378
339
.customIf ([=](const LegalityQuery &Query) {
379
340
return HasRCPC3 && Query.Types [0 ] == s128 &&
380
341
Query.MMODescrs [0 ].Ordering == AtomicOrdering::Acquire;
@@ -399,6 +360,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
399
360
// These extends are also legal
400
361
.legalForTypesWithMemDesc (
401
362
{{s32, p0, s8, 8 }, {s32, p0, s16, 8 }, {s64, p0, s32, 8 }})
363
+ .legalForTypesWithMemDesc ({
364
+ // SVE vscale x 128 bit base sizes
365
+ {nxv16s8, p0, nxv16s8, 8 },
366
+ {nxv8s16, p0, nxv8s16, 8 },
367
+ {nxv4s32, p0, nxv4s32, 8 },
368
+ {nxv2s64, p0, nxv2s64, 8 },
369
+ })
402
370
.widenScalarToNextPow2 (0 , /* MinSize = */ 8 )
403
371
.clampMaxNumElements (0 , s8, 16 )
404
372
.clampMaxNumElements (0 , s16, 8 )
@@ -425,7 +393,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
425
393
.scalarizeIf (typeInSet (0 , {v2s16, v2s8}), 0 )
426
394
.scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 );
427
395
428
- StoreActions
396
+ getActionDefinitionsBuilder (G_STORE)
429
397
.customIf ([=](const LegalityQuery &Query) {
430
398
return HasRCPC3 && Query.Types [0 ] == s128 &&
431
399
Query.MMODescrs [0 ].Ordering == AtomicOrdering::Release;
@@ -445,6 +413,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
445
413
{p0, p0, s64, 8 }, {s128, p0, s128, 8 }, {v16s8, p0, s128, 8 },
446
414
{v8s8, p0, s64, 8 }, {v4s16, p0, s64, 8 }, {v8s16, p0, s128, 8 },
447
415
{v2s32, p0, s64, 8 }, {v4s32, p0, s128, 8 }, {v2s64, p0, s128, 8 }})
416
+ .legalForTypesWithMemDesc ({
417
+ // SVE vscale x 128 bit base sizes
418
+ // TODO: Add nxv2p0. Consider bitcastIf.
419
+ // See #92130
420
+ // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
421
+ {nxv16s8, p0, nxv16s8, 8 },
422
+ {nxv8s16, p0, nxv8s16, 8 },
423
+ {nxv4s32, p0, nxv4s32, 8 },
424
+ {nxv2s64, p0, nxv2s64, 8 },
425
+ })
448
426
.clampScalar (0 , s8, s64)
449
427
.lowerIf ([=](const LegalityQuery &Query) {
450
428
return Query.Types [0 ].isScalar () &&
@@ -532,12 +510,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
532
510
.widenScalarToNextPow2 (0 )
533
511
.clampScalar (0 , s8, s64);
534
512
getActionDefinitionsBuilder (G_FCONSTANT)
535
- .legalIf ([=](const LegalityQuery &Query) {
536
- const auto &Ty = Query.Types [0 ];
537
- if (HasFP16 && Ty == s16)
538
- return true ;
539
- return Ty == s32 || Ty == s64 || Ty == s128;
540
- })
513
+ .legalFor ({s32, s64, s128})
514
+ .legalFor (HasFP16, {s16})
541
515
.clampScalar (0 , MinFPScalar, s128);
542
516
543
517
// FIXME: fix moreElementsToNextPow2
@@ -569,16 +543,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
569
543
.customIf (isVector (0 ));
570
544
571
545
getActionDefinitionsBuilder (G_FCMP)
572
- .legalFor ({{s32, MinFPScalar},
573
- {s32, s32},
546
+ .legalFor ({{s32, s32},
574
547
{s32, s64},
575
548
{v4s32, v4s32},
576
549
{v2s32, v2s32},
577
550
{v2s64, v2s64}})
578
- .legalIf ([=](const LegalityQuery &Query) {
579
- const auto &Ty = Query.Types [1 ];
580
- return (Ty == v8s16 || Ty == v4s16) && Ty == Query.Types [0 ] && HasFP16;
581
- })
551
+ .legalFor (HasFP16, {{s32, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
582
552
.widenScalarOrEltToNextPow2 (1 )
583
553
.clampScalar (0 , s32, s32)
584
554
.minScalarOrElt (1 , MinFPScalar)
@@ -693,13 +663,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
693
663
{v2s64, v2s64},
694
664
{v4s32, v4s32},
695
665
{v2s32, v2s32}})
696
- .legalIf ([=](const LegalityQuery &Query) {
697
- return HasFP16 &&
698
- (Query.Types [1 ] == s16 || Query.Types [1 ] == v4s16 ||
699
- Query.Types [1 ] == v8s16) &&
700
- (Query.Types [0 ] == s32 || Query.Types [0 ] == s64 ||
701
- Query.Types [0 ] == v4s16 || Query.Types [0 ] == v8s16);
702
- })
666
+ .legalFor (HasFP16,
667
+ {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
703
668
.scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
704
669
.scalarizeIf (scalarOrEltWiderThan (1 , 64 ), 1 )
705
670
// The range of a fp16 value fits into an i17, so we can lower the width
@@ -741,13 +706,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
741
706
{v2s64, v2s64},
742
707
{v4s32, v4s32},
743
708
{v2s32, v2s32}})
744
- .legalIf ([=](const LegalityQuery &Query) {
745
- return HasFP16 &&
746
- (Query.Types [1 ] == s16 || Query.Types [1 ] == v4s16 ||
747
- Query.Types [1 ] == v8s16) &&
748
- (Query.Types [0 ] == s32 || Query.Types [0 ] == s64 ||
749
- Query.Types [0 ] == v4s16 || Query.Types [0 ] == v8s16);
750
- })
709
+ .legalFor (HasFP16,
710
+ {{s32, s16}, {s64, s16}, {v4s16, v4s16}, {v8s16, v8s16}})
751
711
// Handle types larger than i64 by scalarizing/lowering.
752
712
.scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
753
713
.scalarizeIf (scalarOrEltWiderThan (1 , 64 ), 1 )
@@ -790,13 +750,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
790
750
{v2s64, v2s64},
791
751
{v4s32, v4s32},
792
752
{v2s32, v2s32}})
793
- .legalIf ([=](const LegalityQuery &Query) {
794
- return HasFP16 &&
795
- (Query.Types [0 ] == s16 || Query.Types [0 ] == v4s16 ||
796
- Query.Types [0 ] == v8s16) &&
797
- (Query.Types [1 ] == s32 || Query.Types [1 ] == s64 ||
798
- Query.Types [1 ] == v4s16 || Query.Types [1 ] == v8s16);
799
- })
753
+ .legalFor (HasFP16,
754
+ {{s16, s32}, {s16, s64}, {v4s16, v4s16}, {v8s16, v8s16}})
800
755
.scalarizeIf (scalarOrEltWiderThan (1 , 64 ), 1 )
801
756
.scalarizeIf (scalarOrEltWiderThan (0 , 64 ), 0 )
802
757
.moreElementsToNextPow2 (1 )
@@ -1050,12 +1005,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1050
1005
.widenScalarToNextPow2 (1 , /* Min=*/ 32 )
1051
1006
.clampScalar (1 , s32, s64)
1052
1007
.scalarSameSizeAs (0 , 1 )
1053
- .legalIf ([=](const LegalityQuery &Query) {
1054
- return (HasCSSC && typeInSet (0 , {s32, s64})(Query));
1055
- })
1056
- .customIf ([=](const LegalityQuery &Query) {
1057
- return (!HasCSSC && typeInSet (0 , {s32, s64})(Query));
1058
- });
1008
+ .legalFor (HasCSSC, {s32, s64})
1009
+ .customFor (!HasCSSC, {s32, s64});
1059
1010
1060
1011
getActionDefinitionsBuilder (G_SHUFFLE_VECTOR)
1061
1012
.legalIf ([=](const LegalityQuery &Query) {
@@ -1143,11 +1094,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1143
1094
}
1144
1095
1145
1096
// FIXME: Legal vector types are only legal with NEON.
1146
- auto &ABSActions = getActionDefinitionsBuilder (G_ABS);
1147
- if (HasCSSC)
1148
- ABSActions
1149
- .legalFor ({s32, s64});
1150
- ABSActions.legalFor (PackedVectorAllTypeList)
1097
+ getActionDefinitionsBuilder (G_ABS)
1098
+ .legalFor (HasCSSC, {s32, s64})
1099
+ .legalFor (PackedVectorAllTypeList)
1151
1100
.customIf ([=](const LegalityQuery &Q) {
1152
1101
// TODO: Fix suboptimal codegen for 128+ bit types.
1153
1102
LLT SrcTy = Q.Types [0 ];
@@ -1171,10 +1120,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1171
1120
// later.
1172
1121
getActionDefinitionsBuilder (G_VECREDUCE_FADD)
1173
1122
.legalFor ({{s32, v2s32}, {s32, v4s32}, {s64, v2s64}})
1174
- .legalIf ([=](const LegalityQuery &Query) {
1175
- const auto &Ty = Query.Types [1 ];
1176
- return (Ty == v4s16 || Ty == v8s16) && HasFP16;
1177
- })
1123
+ .legalFor (HasFP16, {{s16, v4s16}, {s16, v8s16}})
1178
1124
.minScalarOrElt (0 , MinFPScalar)
1179
1125
.clampMaxNumElements (1 , s64, 2 )
1180
1126
.clampMaxNumElements (1 , s32, 4 )
@@ -1215,10 +1161,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1215
1161
getActionDefinitionsBuilder ({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX,
1216
1162
G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM})
1217
1163
.legalFor ({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}})
1218
- .legalIf ([=](const LegalityQuery &Query) {
1219
- const auto &Ty = Query.Types [1 ];
1220
- return Query.Types [0 ] == s16 && (Ty == v8s16 || Ty == v4s16) && HasFP16;
1221
- })
1164
+ .legalFor (HasFP16, {{s16, v4s16}, {s16, v8s16}})
1222
1165
.minScalarOrElt (0 , MinFPScalar)
1223
1166
.clampMaxNumElements (1 , s64, 2 )
1224
1167
.clampMaxNumElements (1 , s32, 4 )
@@ -1295,32 +1238,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
1295
1238
.customFor ({{s32, s32}, {s64, s64}});
1296
1239
1297
1240
auto always = [=](const LegalityQuery &Q) { return true ; };
1298
- auto &CTPOPActions = getActionDefinitionsBuilder (G_CTPOP);
1299
- if (HasCSSC)
1300
- CTPOPActions
1301
- .legalFor ({{s32, s32},
1302
- {s64, s64},
1303
- {v8s8, v8s8},
1304
- {v16s8, v16s8}})
1305
- .customFor ({{s128, s128},
1306
- {v2s64, v2s64},
1307
- {v2s32, v2s32},
1308
- {v4s32, v4s32},
1309
- {v4s16, v4s16},
1310
- {v8s16, v8s16}});
1311
- else
1312
- CTPOPActions
1313
- .legalFor ({{v8s8, v8s8},
1314
- {v16s8, v16s8}})
1315
- .customFor ({{s32, s32},
1316
- {s64, s64},
1317
- {s128, s128},
1318
- {v2s64, v2s64},
1319
- {v2s32, v2s32},
1320
- {v4s32, v4s32},
1321
- {v4s16, v4s16},
1322
- {v8s16, v8s16}});
1323
- CTPOPActions
1241
+ getActionDefinitionsBuilder (G_CTPOP)
1242
+ .legalFor (HasCSSC, {{s32, s32}, {s64, s64}})
1243
+ .legalFor ({{v8s8, v8s8}, {v16s8, v16s8}})
1244
+ .customFor (!HasCSSC, {{s32, s32}, {s64, s64}})
1245
+ .customFor ({{s128, s128},
1246
+ {v2s64, v2s64},
1247
+ {v2s32, v2s32},
1248
+ {v4s32, v4s32},
1249
+ {v4s16, v4s16},
1250
+ {v8s16, v8s16}})
1324
1251
.clampScalar (0 , s32, s128)
1325
1252
.widenScalarToNextPow2 (0 )
1326
1253
.minScalarEltSameAsIf (always, 1 , 0 )
0 commit comments