@@ -27,7 +27,7 @@ def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
27
27
"16-bit mode (i8086)">;
28
28
29
29
//===----------------------------------------------------------------------===//
30
- // X86 Subtarget features
30
+ // X86 Subtarget ISA features
31
31
//===----------------------------------------------------------------------===//
32
32
33
33
def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
@@ -100,20 +100,6 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
100
100
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
101
101
"64-bit with cmpxchg16b",
102
102
[FeatureCMPXCHG8B]>;
103
- def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
104
- "SHLD instruction is slow">;
105
- def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
106
- "PMULLD instruction is slow">;
107
- def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
108
- "true",
109
- "PMADDWD is slower than PMULLD">;
110
- // FIXME: This should not apply to CPUs that do not have SSE.
111
- def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
112
- "IsUAMem16Slow", "true",
113
- "Slow unaligned 16-byte memory access">;
114
- def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
115
- "IsUAMem32Slow", "true",
116
- "Slow unaligned 32-byte memory access">;
117
103
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
118
104
"Support SSE 4a instructions",
119
105
[FeatureSSE3]>;
@@ -255,17 +241,6 @@ def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
255
241
def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
256
242
"Support AMX-BF16 instructions",
257
243
[FeatureAMXTILE]>;
258
- def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
259
- "Use LEA for adjusting the stack pointer">;
260
- def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
261
- "HasSlowDivide32", "true",
262
- "Use 8-bit divide for positive values less than 256">;
263
- def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
264
- "HasSlowDivide64", "true",
265
- "Use 32-bit divide for positive values less than 2^32">;
266
- def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
267
- "PadShortFunctions", "true",
268
- "Pad short functions">;
269
244
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
270
245
"Invalidate Process-Context Identifier">;
271
246
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
@@ -296,31 +271,163 @@ def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
296
271
"Support TSXLDTRK instructions">;
297
272
def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
298
273
"Has UINTR Instructions">;
274
+ def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
275
+ "platform configuration instruction">;
276
+ def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
277
+ "Support movdiri instruction">;
278
+ def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
279
+ "Support movdir64b instruction">;
280
+
281
+ // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
282
+ // "string operations"). See "REP String Enhancement" in the Intel Software
283
+ // Development Manual. This feature essentially means that REP MOVSB will copy
284
+ // using the largest available size instead of copying bytes one by one, making
285
+ // it at least as fast as REPMOVS{W,D,Q}.
286
+ def FeatureERMSB
287
+ : SubtargetFeature<
288
+ "ermsb", "HasERMSB", "true",
289
+ "REP MOVS/STOS are fast">;
290
+
291
+ // Icelake and newer processors have Fast Short REP MOV.
292
+ def FeatureFSRM
293
+ : SubtargetFeature<
294
+ "fsrm", "HasFSRM", "true",
295
+ "REP MOVSB of short lengths is faster">;
296
+
297
+ def FeatureSoftFloat
298
+ : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
299
+ "Use software floating point features">;
300
+
301
+ //===----------------------------------------------------------------------===//
302
+ // X86 Subtarget Security Mitigation features
303
+ //===----------------------------------------------------------------------===//
304
+
305
+ // Lower indirect calls using a special construct called a `retpoline` to
306
+ // mitigate potential Spectre v2 attacks against them.
307
+ def FeatureRetpolineIndirectCalls
308
+ : SubtargetFeature<
309
+ "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
310
+ "Remove speculation of indirect calls from the generated code">;
311
+
312
+ // Lower indirect branches and switches either using conditional branch trees
313
+ // or using a special construct called a `retpoline` to mitigate potential
314
+ // Spectre v2 attacks against them.
315
+ def FeatureRetpolineIndirectBranches
316
+ : SubtargetFeature<
317
+ "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
318
+ "Remove speculation of indirect branches from the generated code">;
319
+
320
+ // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
321
+ // `retpoline-indirect-branches` above.
322
+ def FeatureRetpoline
323
+ : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
324
+ "Remove speculation of indirect branches from the "
325
+ "generated code, either by avoiding them entirely or "
326
+ "lowering them with a speculation blocking construct",
327
+ [FeatureRetpolineIndirectCalls,
328
+ FeatureRetpolineIndirectBranches]>;
329
+
330
+ // Rely on external thunks for the emitted retpoline calls. This allows users
331
+ // to provide their own custom thunk definitions in highly specialized
332
+ // environments such as a kernel that does boot-time hot patching.
333
+ def FeatureRetpolineExternalThunk
334
+ : SubtargetFeature<
335
+ "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
336
+ "When lowering an indirect call or branch using a `retpoline`, rely "
337
+ "on the specified user provided thunk rather than emitting one "
338
+ "ourselves. Only has effect when combined with some other retpoline "
339
+ "feature", [FeatureRetpolineIndirectCalls]>;
340
+
341
+ // Mitigate LVI attacks against indirect calls/branches and call returns
342
+ def FeatureLVIControlFlowIntegrity
343
+ : SubtargetFeature<
344
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
345
+ "Prevent indirect calls/branches from using a memory operand, and "
346
+ "precede all indirect calls/branches from a register with an "
347
+ "LFENCE instruction to serialize control flow. Also decompose RET "
348
+ "instructions into a POP+LFENCE+JMP sequence.">;
349
+
350
+ // Enable SESES to mitigate speculative execution attacks
351
+ def FeatureSpeculativeExecutionSideEffectSuppression
352
+ : SubtargetFeature<
353
+ "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
354
+ "Prevent speculative execution side channel timing attacks by "
355
+ "inserting a speculation barrier before memory reads, memory writes, "
356
+ "and conditional branches. Implies LVI Control Flow integrity.",
357
+ [FeatureLVIControlFlowIntegrity]>;
358
+
359
+ // Mitigate LVI attacks against data loads
360
+ def FeatureLVILoadHardening
361
+ : SubtargetFeature<
362
+ "lvi-load-hardening", "UseLVILoadHardening", "true",
363
+ "Insert LFENCE instructions to prevent data speculatively injected "
364
+ "into loads from being used maliciously.">;
365
+
366
+ //===----------------------------------------------------------------------===//
367
+ // X86 Subtarget Tuning features
368
+ //===----------------------------------------------------------------------===//
369
+
370
+ def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
371
+ "SHLD instruction is slow">;
372
+
373
+ def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
374
+ "PMULLD instruction is slow">;
375
+
376
+ def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
377
+ "true",
378
+ "PMADDWD is slower than PMULLD">;
379
+
380
+ // FIXME: This should not apply to CPUs that do not have SSE.
381
+ def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
382
+ "IsUAMem16Slow", "true",
383
+ "Slow unaligned 16-byte memory access">;
384
+
385
+ def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
386
+ "IsUAMem32Slow", "true",
387
+ "Slow unaligned 32-byte memory access">;
388
+
389
+ def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
390
+ "Use LEA for adjusting the stack pointer">;
391
+
392
+ def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
393
+ "HasSlowDivide32", "true",
394
+ "Use 8-bit divide for positive values less than 256">;
395
+
396
+ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
397
+ "HasSlowDivide64", "true",
398
+ "Use 32-bit divide for positive values less than 2^32">;
399
+
400
+ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
401
+ "PadShortFunctions", "true",
402
+ "Pad short functions">;
403
+
299
404
// On some processors, instructions that implicitly take two memory operands are
300
405
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
301
406
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
302
407
def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
303
408
"SlowTwoMemOps", "true",
304
409
"Two memory operand instructions are slow">;
410
+
305
411
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
306
412
"LEA instruction needs inputs at AG stage">;
413
+
307
414
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
308
415
"LEA instruction with certain arguments is slow">;
416
+
309
417
def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
310
418
"LEA instruction with 3 ops or certain registers is slow">;
419
+
311
420
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
312
421
"INC and DEC instructions are slower than ADD and SUB">;
313
- def FeatureSoftFloat
314
- : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
315
- "Use software floating point features">;
422
+
316
423
def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
317
424
"HasPOPCNTFalseDeps", "true",
318
425
"POPCNT has a false dependency on dest register">;
426
+
319
427
def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
320
428
"HasLZCNTFalseDeps", "true",
321
429
"LZCNT/TZCNT have a false dependency on dest register">;
322
- def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
323
- "platform configuration instruction">;
430
+
324
431
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
325
432
// using a variable mask over multiple fixed shuffles.
326
433
def FeatureFastVariableCrossLaneShuffle
@@ -338,6 +445,7 @@ def FeatureInsertVZEROUPPER
338
445
: SubtargetFeature<"vzeroupper",
339
446
"InsertVZEROUPPER",
340
447
"true", "Should insert vzeroupper instructions">;
448
+
341
449
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
342
450
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
343
451
// vector FSQRT has higher throughput than the corresponding NR code.
@@ -351,27 +459,32 @@ def FeatureFastScalarFSQRT
351
459
def FeatureFastVectorFSQRT
352
460
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
353
461
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
462
+
354
463
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
355
464
// be used to replace test/set sequences.
356
465
def FeatureFastLZCNT
357
466
: SubtargetFeature<
358
467
"fast-lzcnt", "HasFastLZCNT", "true",
359
468
"LZCNT instructions are as fast as most simple integer ops">;
469
+
360
470
// If the target can efficiently decode NOPs upto 7-bytes in length.
361
471
def FeatureFast7ByteNOP
362
472
: SubtargetFeature<
363
473
"fast-7bytenop", "HasFast7ByteNOP", "true",
364
474
"Target can quickly decode up to 7 byte NOPs">;
475
+
365
476
// If the target can efficiently decode NOPs upto 11-bytes in length.
366
477
def FeatureFast11ByteNOP
367
478
: SubtargetFeature<
368
479
"fast-11bytenop", "HasFast11ByteNOP", "true",
369
480
"Target can quickly decode up to 11 byte NOPs">;
481
+
370
482
// If the target can efficiently decode NOPs upto 15-bytes in length.
371
483
def FeatureFast15ByteNOP
372
484
: SubtargetFeature<
373
485
"fast-15bytenop", "HasFast15ByteNOP", "true",
374
486
"Target can quickly decode up to 15 byte NOPs">;
487
+
375
488
// Sandy Bridge and newer processors can use SHLD with the same source on both
376
489
// inputs to implement rotate to avoid the partial flag update of the normal
377
490
// rotate instructions.
@@ -380,22 +493,6 @@ def FeatureFastSHLDRotate
380
493
"fast-shld-rotate", "HasFastSHLDRotate", "true",
381
494
"SHLD can be used as a faster rotate">;
382
495
383
- // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
384
- // "string operations"). See "REP String Enhancement" in the Intel Software
385
- // Development Manual. This feature essentially means that REP MOVSB will copy
386
- // using the largest available size instead of copying bytes one by one, making
387
- // it at least as fast as REPMOVS{W,D,Q}.
388
- def FeatureERMSB
389
- : SubtargetFeature<
390
- "ermsb", "HasERMSB", "true",
391
- "REP MOVS/STOS are fast">;
392
-
393
- // Icelake and newer processors have Fast Short REP MOV.
394
- def FeatureFSRM
395
- : SubtargetFeature<
396
- "fsrm", "HasFSRM", "true",
397
- "REP MOVSB of short lengths is faster">;
398
-
399
496
// Bulldozer and newer processors can merge CMP/TEST (but not other
400
497
// instructions) with conditional branches.
401
498
def FeatureBranchFusion
@@ -429,73 +526,6 @@ def FeaturePreferMaskRegisters
429
526
: SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
430
527
"Prefer AVX512 mask registers over PTEST/MOVMSK">;
431
528
432
- // Lower indirect calls using a special construct called a `retpoline` to
433
- // mitigate potential Spectre v2 attacks against them.
434
- def FeatureRetpolineIndirectCalls
435
- : SubtargetFeature<
436
- "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
437
- "Remove speculation of indirect calls from the generated code">;
438
-
439
- // Lower indirect branches and switches either using conditional branch trees
440
- // or using a special construct called a `retpoline` to mitigate potential
441
- // Spectre v2 attacks against them.
442
- def FeatureRetpolineIndirectBranches
443
- : SubtargetFeature<
444
- "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
445
- "Remove speculation of indirect branches from the generated code">;
446
-
447
- // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
448
- // `retpoline-indirect-branches` above.
449
- def FeatureRetpoline
450
- : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
451
- "Remove speculation of indirect branches from the "
452
- "generated code, either by avoiding them entirely or "
453
- "lowering them with a speculation blocking construct",
454
- [FeatureRetpolineIndirectCalls,
455
- FeatureRetpolineIndirectBranches]>;
456
-
457
- // Rely on external thunks for the emitted retpoline calls. This allows users
458
- // to provide their own custom thunk definitions in highly specialized
459
- // environments such as a kernel that does boot-time hot patching.
460
- def FeatureRetpolineExternalThunk
461
- : SubtargetFeature<
462
- "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
463
- "When lowering an indirect call or branch using a `retpoline`, rely "
464
- "on the specified user provided thunk rather than emitting one "
465
- "ourselves. Only has effect when combined with some other retpoline "
466
- "feature", [FeatureRetpolineIndirectCalls]>;
467
-
468
- // Mitigate LVI attacks against indirect calls/branches and call returns
469
- def FeatureLVIControlFlowIntegrity
470
- : SubtargetFeature<
471
- "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
472
- "Prevent indirect calls/branches from using a memory operand, and "
473
- "precede all indirect calls/branches from a register with an "
474
- "LFENCE instruction to serialize control flow. Also decompose RET "
475
- "instructions into a POP+LFENCE+JMP sequence.">;
476
-
477
- // Enable SESES to mitigate speculative execution attacks
478
- def FeatureSpeculativeExecutionSideEffectSuppression
479
- : SubtargetFeature<
480
- "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
481
- "Prevent speculative execution side channel timing attacks by "
482
- "inserting a speculation barrier before memory reads, memory writes, "
483
- "and conditional branches. Implies LVI Control Flow integrity.",
484
- [FeatureLVIControlFlowIntegrity]>;
485
-
486
- // Mitigate LVI attacks against data loads
487
- def FeatureLVILoadHardening
488
- : SubtargetFeature<
489
- "lvi-load-hardening", "UseLVILoadHardening", "true",
490
- "Insert LFENCE instructions to prevent data speculatively injected "
491
- "into loads from being used maliciously.">;
492
-
493
- // Direct Move instructions.
494
- def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
495
- "Support movdiri instruction">;
496
- def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
497
- "Support movdir64b instruction">;
498
-
499
529
def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
500
530
"Indicates that the BEXTR instruction is implemented as a single uop "
501
531
"with good throughput">;
0 commit comments