Skip to content

Commit 7d40ea8

Browse files
committed
[RISCV] Enable the TypePromotion pass from AArch64/ARM.
This pass looks for unsigned icmps that have illegal types and tries to widen the use/def graph to improve the placement of the zero extends that type legalization would need to insert. I've explicitly disabled it for i32 by adding a check for isSExtCheaperThanZExt to the pass. The generated code isn't perfect, but my data shows a net dynamic instruction count improvement on spec2017 for both base and Zba+Zbb+Zbs.
1 parent 9838c85 commit 7d40ea8

File tree

7 files changed

+190
-87
lines changed

7 files changed

+190
-87
lines changed

llvm/lib/CodeGen/TypePromotion.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
937937
return 0;
938938

939939
EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
940+
if (TLI->isSExtCheaperThanZExt(SrcVT, PromotedVT))
941+
return 0;
940942
if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
941943
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
942944
<< "for promoted type\n");

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ class RISCVPassConfig : public TargetPassConfig {
366366

367367
void addIRPasses() override;
368368
bool addPreISel() override;
369+
void addCodeGenPrepare() override;
369370
bool addInstSelector() override;
370371
bool addIRTranslator() override;
371372
void addPreLegalizeMachineIR() override;
@@ -452,6 +453,12 @@ bool RISCVPassConfig::addPreISel() {
452453
return false;
453454
}
454455

456+
void RISCVPassConfig::addCodeGenPrepare() {
457+
if (getOptLevel() != CodeGenOptLevel::None)
458+
addPass(createTypePromotionLegacyPass());
459+
TargetPassConfig::addCodeGenPrepare();
460+
}
461+
455462
bool RISCVPassConfig::addInstSelector() {
456463
addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel()));
457464

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
; CHECK-NEXT: Expand reduction intrinsics
6969
; CHECK-NEXT: Natural Loop Information
7070
; CHECK-NEXT: TLS Variable Hoist
71+
; CHECK-NEXT: Type Promotion
7172
; CHECK-NEXT: CodeGen Prepare
7273
; CHECK-NEXT: Dominator Tree Construction
7374
; CHECK-NEXT: Exception handling preparation

llvm/test/CodeGen/RISCV/lack-of-signed-truncation-check.ll

Lines changed: 64 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -254,21 +254,39 @@ define i1 @shifts_necmp_i64_i8(i64 %x) nounwind {
254254
; ---------------------------------------------------------------------------- ;
255255

256256
define i1 @add_ultcmp_i16_i8(i16 %x) nounwind {
257-
; RV32-LABEL: add_ultcmp_i16_i8:
258-
; RV32: # %bb.0:
259-
; RV32-NEXT: addi a0, a0, -128
260-
; RV32-NEXT: slli a0, a0, 16
261-
; RV32-NEXT: srli a0, a0, 24
262-
; RV32-NEXT: sltiu a0, a0, 255
263-
; RV32-NEXT: ret
257+
; RV32I-LABEL: add_ultcmp_i16_i8:
258+
; RV32I: # %bb.0:
259+
; RV32I-NEXT: slli a0, a0, 16
260+
; RV32I-NEXT: srli a0, a0, 16
261+
; RV32I-NEXT: addi a0, a0, -128
262+
; RV32I-NEXT: srli a0, a0, 8
263+
; RV32I-NEXT: sltiu a0, a0, 255
264+
; RV32I-NEXT: ret
264265
;
265-
; RV64-LABEL: add_ultcmp_i16_i8:
266-
; RV64: # %bb.0:
267-
; RV64-NEXT: addi a0, a0, -128
268-
; RV64-NEXT: slli a0, a0, 48
269-
; RV64-NEXT: srli a0, a0, 56
270-
; RV64-NEXT: sltiu a0, a0, 255
271-
; RV64-NEXT: ret
266+
; RV64I-LABEL: add_ultcmp_i16_i8:
267+
; RV64I: # %bb.0:
268+
; RV64I-NEXT: slli a0, a0, 48
269+
; RV64I-NEXT: srli a0, a0, 48
270+
; RV64I-NEXT: addi a0, a0, -128
271+
; RV64I-NEXT: srli a0, a0, 8
272+
; RV64I-NEXT: sltiu a0, a0, 255
273+
; RV64I-NEXT: ret
274+
;
275+
; RV32ZBB-LABEL: add_ultcmp_i16_i8:
276+
; RV32ZBB: # %bb.0:
277+
; RV32ZBB-NEXT: zext.h a0, a0
278+
; RV32ZBB-NEXT: addi a0, a0, -128
279+
; RV32ZBB-NEXT: srli a0, a0, 8
280+
; RV32ZBB-NEXT: sltiu a0, a0, 255
281+
; RV32ZBB-NEXT: ret
282+
;
283+
; RV64ZBB-LABEL: add_ultcmp_i16_i8:
284+
; RV64ZBB: # %bb.0:
285+
; RV64ZBB-NEXT: zext.h a0, a0
286+
; RV64ZBB-NEXT: addi a0, a0, -128
287+
; RV64ZBB-NEXT: srli a0, a0, 8
288+
; RV64ZBB-NEXT: sltiu a0, a0, 255
289+
; RV64ZBB-NEXT: ret
272290
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
273291
%tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8
274292
ret i1 %tmp1
@@ -421,21 +439,39 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
421439

422440
; Slightly more canonical variant
423441
define i1 @add_ulecmp_i16_i8(i16 %x) nounwind {
424-
; RV32-LABEL: add_ulecmp_i16_i8:
425-
; RV32: # %bb.0:
426-
; RV32-NEXT: addi a0, a0, -128
427-
; RV32-NEXT: slli a0, a0, 16
428-
; RV32-NEXT: srli a0, a0, 24
429-
; RV32-NEXT: sltiu a0, a0, 255
430-
; RV32-NEXT: ret
442+
; RV32I-LABEL: add_ulecmp_i16_i8:
443+
; RV32I: # %bb.0:
444+
; RV32I-NEXT: slli a0, a0, 16
445+
; RV32I-NEXT: srli a0, a0, 16
446+
; RV32I-NEXT: addi a0, a0, -128
447+
; RV32I-NEXT: srli a0, a0, 8
448+
; RV32I-NEXT: sltiu a0, a0, 255
449+
; RV32I-NEXT: ret
431450
;
432-
; RV64-LABEL: add_ulecmp_i16_i8:
433-
; RV64: # %bb.0:
434-
; RV64-NEXT: addi a0, a0, -128
435-
; RV64-NEXT: slli a0, a0, 48
436-
; RV64-NEXT: srli a0, a0, 56
437-
; RV64-NEXT: sltiu a0, a0, 255
438-
; RV64-NEXT: ret
451+
; RV64I-LABEL: add_ulecmp_i16_i8:
452+
; RV64I: # %bb.0:
453+
; RV64I-NEXT: slli a0, a0, 48
454+
; RV64I-NEXT: srli a0, a0, 48
455+
; RV64I-NEXT: addi a0, a0, -128
456+
; RV64I-NEXT: srli a0, a0, 8
457+
; RV64I-NEXT: sltiu a0, a0, 255
458+
; RV64I-NEXT: ret
459+
;
460+
; RV32ZBB-LABEL: add_ulecmp_i16_i8:
461+
; RV32ZBB: # %bb.0:
462+
; RV32ZBB-NEXT: zext.h a0, a0
463+
; RV32ZBB-NEXT: addi a0, a0, -128
464+
; RV32ZBB-NEXT: srli a0, a0, 8
465+
; RV32ZBB-NEXT: sltiu a0, a0, 255
466+
; RV32ZBB-NEXT: ret
467+
;
468+
; RV64ZBB-LABEL: add_ulecmp_i16_i8:
469+
; RV64ZBB: # %bb.0:
470+
; RV64ZBB-NEXT: zext.h a0, a0
471+
; RV64ZBB-NEXT: addi a0, a0, -128
472+
; RV64ZBB-NEXT: srli a0, a0, 8
473+
; RV64ZBB-NEXT: sltiu a0, a0, 255
474+
; RV64ZBB-NEXT: ret
439475
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
440476
%tmp1 = icmp ule i16 %tmp0, -257 ; ~0U << 8 - 1
441477
ret i1 %tmp1

llvm/test/CodeGen/RISCV/signbit-test.ll

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind {
303303
; RV32-NEXT: bnez a1, .LBB10_2
304304
; RV32-NEXT: # %bb.1: # %t
305305
; RV32-NEXT: li a0, 42
306-
; RV32-NEXT: .LBB10_2: # %f
306+
; RV32-NEXT: ret
307+
; RV32-NEXT: .LBB10_2:
308+
; RV32-NEXT: slli a0, a0, 16
309+
; RV32-NEXT: srli a0, a0, 16
307310
; RV32-NEXT: ret
308311
;
309312
; RV64-LABEL: test_clear_mask_i16_i8:
@@ -312,7 +315,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind {
312315
; RV64-NEXT: bnez a1, .LBB10_2
313316
; RV64-NEXT: # %bb.1: # %t
314317
; RV64-NEXT: li a0, 42
315-
; RV64-NEXT: .LBB10_2: # %f
318+
; RV64-NEXT: ret
319+
; RV64-NEXT: .LBB10_2:
320+
; RV64-NEXT: slli a0, a0, 48
321+
; RV64-NEXT: srli a0, a0, 48
316322
; RV64-NEXT: ret
317323
entry:
318324
%a = and i16 %x, 128
@@ -332,7 +338,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind {
332338
; RV32-NEXT: beqz a1, .LBB11_2
333339
; RV32-NEXT: # %bb.1: # %t
334340
; RV32-NEXT: li a0, 42
335-
; RV32-NEXT: .LBB11_2: # %f
341+
; RV32-NEXT: ret
342+
; RV32-NEXT: .LBB11_2:
343+
; RV32-NEXT: slli a0, a0, 16
344+
; RV32-NEXT: srli a0, a0, 16
336345
; RV32-NEXT: ret
337346
;
338347
; RV64-LABEL: test_set_mask_i16_i8:
@@ -341,7 +350,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind {
341350
; RV64-NEXT: beqz a1, .LBB11_2
342351
; RV64-NEXT: # %bb.1: # %t
343352
; RV64-NEXT: li a0, 42
344-
; RV64-NEXT: .LBB11_2: # %f
353+
; RV64-NEXT: ret
354+
; RV64-NEXT: .LBB11_2:
355+
; RV64-NEXT: slli a0, a0, 48
356+
; RV64-NEXT: srli a0, a0, 48
345357
; RV64-NEXT: ret
346358
entry:
347359
%a = and i16 %x, 128
@@ -361,7 +373,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind {
361373
; RV32-NEXT: beqz a1, .LBB12_2
362374
; RV32-NEXT: # %bb.1: # %t
363375
; RV32-NEXT: li a0, 42
364-
; RV32-NEXT: .LBB12_2: # %f
376+
; RV32-NEXT: ret
377+
; RV32-NEXT: .LBB12_2:
378+
; RV32-NEXT: slli a0, a0, 16
379+
; RV32-NEXT: srli a0, a0, 16
365380
; RV32-NEXT: ret
366381
;
367382
; RV64-LABEL: test_set_mask_i16_i7:
@@ -370,7 +385,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind {
370385
; RV64-NEXT: beqz a1, .LBB12_2
371386
; RV64-NEXT: # %bb.1: # %t
372387
; RV64-NEXT: li a0, 42
373-
; RV64-NEXT: .LBB12_2: # %f
388+
; RV64-NEXT: ret
389+
; RV64-NEXT: .LBB12_2:
390+
; RV64-NEXT: slli a0, a0, 48
391+
; RV64-NEXT: srli a0, a0, 48
374392
; RV64-NEXT: ret
375393
entry:
376394
%a = and i16 %x, 64

llvm/test/CodeGen/RISCV/signed-truncation-check.ll

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -254,23 +254,43 @@ define i1 @shifts_eqcmp_i64_i8(i64 %x) nounwind {
254254
; ---------------------------------------------------------------------------- ;
255255

256256
define i1 @add_ugecmp_i16_i8(i16 %x) nounwind {
257-
; RV32-LABEL: add_ugecmp_i16_i8:
258-
; RV32: # %bb.0:
259-
; RV32-NEXT: addi a0, a0, -128
260-
; RV32-NEXT: slli a0, a0, 16
261-
; RV32-NEXT: srli a0, a0, 24
262-
; RV32-NEXT: sltiu a0, a0, 255
263-
; RV32-NEXT: xori a0, a0, 1
264-
; RV32-NEXT: ret
257+
; RV32I-LABEL: add_ugecmp_i16_i8:
258+
; RV32I: # %bb.0:
259+
; RV32I-NEXT: slli a0, a0, 16
260+
; RV32I-NEXT: srli a0, a0, 16
261+
; RV32I-NEXT: addi a0, a0, -128
262+
; RV32I-NEXT: srli a0, a0, 8
263+
; RV32I-NEXT: sltiu a0, a0, 255
264+
; RV32I-NEXT: xori a0, a0, 1
265+
; RV32I-NEXT: ret
265266
;
266-
; RV64-LABEL: add_ugecmp_i16_i8:
267-
; RV64: # %bb.0:
268-
; RV64-NEXT: addi a0, a0, -128
269-
; RV64-NEXT: slli a0, a0, 48
270-
; RV64-NEXT: srli a0, a0, 56
271-
; RV64-NEXT: sltiu a0, a0, 255
272-
; RV64-NEXT: xori a0, a0, 1
273-
; RV64-NEXT: ret
267+
; RV64I-LABEL: add_ugecmp_i16_i8:
268+
; RV64I: # %bb.0:
269+
; RV64I-NEXT: slli a0, a0, 48
270+
; RV64I-NEXT: srli a0, a0, 48
271+
; RV64I-NEXT: addi a0, a0, -128
272+
; RV64I-NEXT: srli a0, a0, 8
273+
; RV64I-NEXT: sltiu a0, a0, 255
274+
; RV64I-NEXT: xori a0, a0, 1
275+
; RV64I-NEXT: ret
276+
;
277+
; RV32ZBB-LABEL: add_ugecmp_i16_i8:
278+
; RV32ZBB: # %bb.0:
279+
; RV32ZBB-NEXT: zext.h a0, a0
280+
; RV32ZBB-NEXT: addi a0, a0, -128
281+
; RV32ZBB-NEXT: srli a0, a0, 8
282+
; RV32ZBB-NEXT: sltiu a0, a0, 255
283+
; RV32ZBB-NEXT: xori a0, a0, 1
284+
; RV32ZBB-NEXT: ret
285+
;
286+
; RV64ZBB-LABEL: add_ugecmp_i16_i8:
287+
; RV64ZBB: # %bb.0:
288+
; RV64ZBB-NEXT: zext.h a0, a0
289+
; RV64ZBB-NEXT: addi a0, a0, -128
290+
; RV64ZBB-NEXT: srli a0, a0, 8
291+
; RV64ZBB-NEXT: sltiu a0, a0, 255
292+
; RV64ZBB-NEXT: xori a0, a0, 1
293+
; RV64ZBB-NEXT: ret
274294
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
275295
%tmp1 = icmp uge i16 %tmp0, -256 ; ~0U << 8
276296
ret i1 %tmp1
@@ -471,23 +491,43 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
471491

472492
; Slightly more canonical variant
473493
define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind {
474-
; RV32-LABEL: add_ugtcmp_i16_i8:
475-
; RV32: # %bb.0:
476-
; RV32-NEXT: addi a0, a0, -128
477-
; RV32-NEXT: slli a0, a0, 16
478-
; RV32-NEXT: srli a0, a0, 24
479-
; RV32-NEXT: sltiu a0, a0, 255
480-
; RV32-NEXT: xori a0, a0, 1
481-
; RV32-NEXT: ret
494+
; RV32I-LABEL: add_ugtcmp_i16_i8:
495+
; RV32I: # %bb.0:
496+
; RV32I-NEXT: slli a0, a0, 16
497+
; RV32I-NEXT: srli a0, a0, 16
498+
; RV32I-NEXT: addi a0, a0, -128
499+
; RV32I-NEXT: srli a0, a0, 8
500+
; RV32I-NEXT: sltiu a0, a0, 255
501+
; RV32I-NEXT: xori a0, a0, 1
502+
; RV32I-NEXT: ret
482503
;
483-
; RV64-LABEL: add_ugtcmp_i16_i8:
484-
; RV64: # %bb.0:
485-
; RV64-NEXT: addi a0, a0, -128
486-
; RV64-NEXT: slli a0, a0, 48
487-
; RV64-NEXT: srli a0, a0, 56
488-
; RV64-NEXT: sltiu a0, a0, 255
489-
; RV64-NEXT: xori a0, a0, 1
490-
; RV64-NEXT: ret
504+
; RV64I-LABEL: add_ugtcmp_i16_i8:
505+
; RV64I: # %bb.0:
506+
; RV64I-NEXT: slli a0, a0, 48
507+
; RV64I-NEXT: srli a0, a0, 48
508+
; RV64I-NEXT: addi a0, a0, -128
509+
; RV64I-NEXT: srli a0, a0, 8
510+
; RV64I-NEXT: sltiu a0, a0, 255
511+
; RV64I-NEXT: xori a0, a0, 1
512+
; RV64I-NEXT: ret
513+
;
514+
; RV32ZBB-LABEL: add_ugtcmp_i16_i8:
515+
; RV32ZBB: # %bb.0:
516+
; RV32ZBB-NEXT: zext.h a0, a0
517+
; RV32ZBB-NEXT: addi a0, a0, -128
518+
; RV32ZBB-NEXT: srli a0, a0, 8
519+
; RV32ZBB-NEXT: sltiu a0, a0, 255
520+
; RV32ZBB-NEXT: xori a0, a0, 1
521+
; RV32ZBB-NEXT: ret
522+
;
523+
; RV64ZBB-LABEL: add_ugtcmp_i16_i8:
524+
; RV64ZBB: # %bb.0:
525+
; RV64ZBB-NEXT: zext.h a0, a0
526+
; RV64ZBB-NEXT: addi a0, a0, -128
527+
; RV64ZBB-NEXT: srli a0, a0, 8
528+
; RV64ZBB-NEXT: sltiu a0, a0, 255
529+
; RV64ZBB-NEXT: xori a0, a0, 1
530+
; RV64ZBB-NEXT: ret
491531
%tmp0 = add i16 %x, -128 ; ~0U << (8-1)
492532
%tmp1 = icmp ugt i16 %tmp0, -257 ; ~0U << 8 - 1
493533
ret i1 %tmp1

0 commit comments

Comments
 (0)