|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
2 |
| -; RUN: opt -passes=instcombine -S < %s | FileCheck %s |
| 2 | +; RUN: opt -passes=instsimplify -S < %s | FileCheck %s |
3 | 3 |
|
4 | 4 | ;; The and X, (add Y, -1) pattern is from an earlier instcombine pass which
|
5 | 5 | ;; converted
|
|
29 | 29 | ;; calls were the same Value*, but since there's two of them it doesn't
|
30 | 30 | ;; work and we convert the urem to add/and. CSE then gets rid of the extra
|
31 | 31 | ;; vscale, leaving us with a new pattern to match. This only works because
|
32 |
| -;; vscale is known to be a nonzero power of 2 (assuming there's a defined |
33 |
| -;; range for it). |
| 32 | +;; vscale is known to be a power of 2 (assuming there's a defined range for it). |
34 | 33 |
|
35 | 34 | define i64 @f1() #0 {
|
36 | 35 | ; CHECK-LABEL: define i64 @f1
|
37 | 36 | ; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
38 | 37 | ; CHECK-NEXT: entry:
|
39 | 38 | ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
|
40 |
| -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 |
41 |
| -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3 |
42 |
| -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], -1 |
| 39 | +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4 |
| 40 | +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 3 |
| 41 | +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 |
43 | 42 | ; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], [[TMP3]]
|
44 | 43 | ; CHECK-NEXT: ret i64 [[REM]]
|
45 | 44 | ;
|
46 | 45 | entry:
|
47 | 46 | %0 = call i64 @llvm.vscale.i64()
|
48 |
| - %1 = shl nuw nsw i64 %0, 4 |
49 |
| - %2 = shl nuw nsw i64 %0, 3 |
50 |
| - %3 = add nsw i64 %2, -1 |
| 47 | + %1 = shl i64 %0, 4 |
| 48 | + %2 = shl i64 %0, 3 |
| 49 | + %3 = add i64 %2, -1 |
| 50 | + %rem = and i64 %1, %3 |
| 51 | + ret i64 %rem |
| 52 | +} |
| 53 | + |
| 54 | +;; Make sure it works if the value could also be zero. |
| 55 | +define i64 @test_pow2_or_zero(i64 %arg) { |
| 56 | +; CHECK-LABEL: define i64 @test_pow2_or_zero |
| 57 | +; CHECK-SAME: (i64 [[ARG:%.*]]) { |
| 58 | +; CHECK-NEXT: [[NEG:%.*]] = sub i64 0, [[ARG]] |
| 59 | +; CHECK-NEXT: [[X:%.*]] = and i64 [[NEG]], [[ARG]] |
| 60 | +; CHECK-NEXT: [[SHL1:%.*]] = shl i64 [[X]], 4 |
| 61 | +; CHECK-NEXT: [[SHL2:%.*]] = shl i64 [[X]], 3 |
| 62 | +; CHECK-NEXT: [[MASK:%.*]] = add i64 [[SHL2]], -1 |
| 63 | +; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHL1]], [[MASK]] |
| 64 | +; CHECK-NEXT: ret i64 [[REM]] |
| 65 | +; |
| 66 | + %neg = sub i64 0, %arg |
| 67 | + %x = and i64 %neg, %arg |
| 68 | + %shl1 = shl i64 %x, 4 |
| 69 | + %shl2 = shl i64 %x, 3 |
| 70 | + %mask = add i64 %shl2, -1 |
| 71 | + %rem = and i64 %shl1, %mask |
| 72 | + ret i64 %rem |
| 73 | +} |
| 74 | + |
| 75 | +;; Make sure it doesn't work if the value isn't known to be a power of 2. |
| 76 | +define i64 @no_pow2() { |
| 77 | +; CHECK-LABEL: define i64 @no_pow2() { |
| 78 | +; CHECK-NEXT: entry: |
| 79 | +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| 80 | +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4 |
| 81 | +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 3 |
| 82 | +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 |
| 83 | +; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], [[TMP3]] |
| 84 | +; CHECK-NEXT: ret i64 [[REM]] |
| 85 | +; |
| 86 | +entry: |
| 87 | + %0 = call i64 @llvm.vscale.i64() |
| 88 | + %1 = shl i64 %0, 4 |
| 89 | + %2 = shl i64 %0, 3 |
| 90 | + %3 = add i64 %2, -1 |
51 | 91 | %rem = and i64 %1, %3
|
52 | 92 | ret i64 %rem
|
53 | 93 | }
|
|
0 commit comments