@@ -421,11 +421,12 @@ define i32 @shl_lshr_constants(i32 %x) {
421
421
ret i32 %r
422
422
}
423
423
424
+ ; Pre-shift a constant to eliminate lshr.
425
+
424
426
define i8 @shl_lshr_demand1 (i8 %x ) {
425
427
; CHECK-LABEL: @shl_lshr_demand1(
426
- ; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]]
427
- ; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[SHL]], 3
428
- ; CHECK-NEXT: [[R:%.*]] = or i8 [[LSHR]], -32
428
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X:%.*]]
429
+ ; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32
429
430
; CHECK-NEXT: ret i8 [[R]]
430
431
;
431
432
%shl = shl i8 40 , %x ; 0b0010_1000
@@ -434,11 +435,13 @@ define i8 @shl_lshr_demand1(i8 %x) {
434
435
ret i8 %r
435
436
}
436
437
438
+ ; Pre-shift a constant to eliminate disguised lshr.
439
+
437
440
define i8 @shl_ashr_demand2 (i8 %x ) {
438
441
; CHECK-LABEL: @shl_ashr_demand2(
439
442
; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]]
440
443
; CHECK-NEXT: call void @use8(i8 [[SHL]])
441
- ; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i8 [[SHL]], 3
444
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X]]
442
445
; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32
443
446
; CHECK-NEXT: ret i8 [[R]]
444
447
;
@@ -449,6 +452,8 @@ define i8 @shl_ashr_demand2(i8 %x) {
449
452
ret i8 %r
450
453
}
451
454
455
+ ; It is not safe to pre-shift because we demand an extra high bit.
456
+
452
457
define i8 @shl_lshr_demand3 (i8 %x ) {
453
458
; CHECK-LABEL: @shl_lshr_demand3(
454
459
; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]]
@@ -462,6 +467,8 @@ define i8 @shl_lshr_demand3(i8 %x) {
462
467
ret i8 %r
463
468
}
464
469
470
+ ; It is not valid to pre-shift because we lose the low bit of 44.
471
+
465
472
define i8 @shl_lshr_demand4 (i8 %x ) {
466
473
; CHECK-LABEL: @shl_lshr_demand4(
467
474
; CHECK-NEXT: [[SHL:%.*]] = shl i8 44, [[X:%.*]]
@@ -475,11 +482,12 @@ define i8 @shl_lshr_demand4(i8 %x) {
475
482
ret i8 %r
476
483
}
477
484
485
+ ; Splat vectors work too, and we don't care what instruction reduces demand for high bits.
486
+
478
487
define <2 x i6 > @shl_lshr_demand5 (<2 x i8 > %x ) {
479
488
; CHECK-LABEL: @shl_lshr_demand5(
480
- ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
481
- ; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[SHL]], <i8 2, i8 2>
482
- ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
489
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 37>, [[X:%.*]]
490
+ ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
483
491
; CHECK-NEXT: ret <2 x i6> [[R]]
484
492
;
485
493
%shl = shl <2 x i8 > <i8 148 , i8 148 >, %x ; 0b1001_0100
@@ -488,6 +496,8 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
488
496
ret <2 x i6 > %r
489
497
}
490
498
499
+ ; TODO: allow undef/poison elements for this transform.
500
+
491
501
define <2 x i6 > @shl_lshr_demand5_undef_left (<2 x i8 > %x ) {
492
502
; CHECK-LABEL: @shl_lshr_demand5_undef_left(
493
503
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 undef, i8 -108>, [[X:%.*]]
@@ -501,6 +511,8 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
501
511
ret <2 x i6 > %r
502
512
}
503
513
514
+ ; TODO: allow undef/poison elements for this transform.
515
+
504
516
define <2 x i6 > @shl_lshr_demand5_undef_right (<2 x i8 > %x ) {
505
517
; CHECK-LABEL: @shl_lshr_demand5_undef_right(
506
518
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -514,6 +526,8 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
514
526
ret <2 x i6 > %r
515
527
}
516
528
529
+ ; TODO: allow non-splat vector constants.
530
+
517
531
define <2 x i6 > @shl_lshr_demand5_nonuniform_vec_left (<2 x i8 > %x ) {
518
532
; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left(
519
533
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -527,11 +541,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
527
541
ret <2 x i6 > %r
528
542
}
529
543
544
+ ; non-splat shl constant is ok.
545
+
530
546
define <2 x i6 > @shl_lshr_demand5_nonuniform_vec_right (<2 x i8 > %x ) {
531
547
; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_right(
532
- ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -112>, [[X:%.*]]
533
- ; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 2, i8 2>
534
- ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
548
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 36>, [[X:%.*]]
549
+ ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
535
550
; CHECK-NEXT: ret <2 x i6> [[R]]
536
551
;
537
552
%shl = shl <2 x i8 > <i8 148 , i8 144 >, %x ; 0b1001_0100, 0b1001_0000
@@ -540,6 +555,8 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
540
555
ret <2 x i6 > %r
541
556
}
542
557
558
+ ; This is possible, but may require significant changes to the demanded bits framework.
559
+
543
560
define <2 x i6 > @shl_lshr_demand5_nonuniform_vec_both (<2 x i8 > %x ) {
544
561
; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both(
545
562
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -104, i8 -108>, [[X:%.*]]
@@ -553,11 +570,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
553
570
ret <2 x i6 > %r
554
571
}
555
572
573
+ ; 'and' can reduce demand for high bits too.
574
+
556
575
define i16 @shl_lshr_demand6 (i16 %x ) {
557
576
; CHECK-LABEL: @shl_lshr_demand6(
558
- ; CHECK-NEXT: [[SHL:%.*]] = shl i16 -32624, [[X:%.*]]
559
- ; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i16 [[SHL]], 4
560
- ; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 4094
577
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2057, [[X:%.*]]
578
+ ; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], 4094
561
579
; CHECK-NEXT: ret i16 [[R]]
562
580
;
563
581
%shl = shl i16 32912 , %x ; 0b1000_0000_1001_0000
0 commit comments