|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
1 | 2 | ; RUN: opt -S -mtriple=amdgcn-- -passes=separate-const-offset-from-gep,slsr,gvn < %s | FileCheck %s
|
2 |
| -; RUN: opt -S -mtriple=amdgcn-- -passes="separate-const-offset-from-gep,slsr,gvn" < %s | FileCheck %s |
3 | 3 |
|
4 | 4 | target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
5 | 5 |
|
6 | 6 |
|
7 |
| -; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset( |
8 |
| -; CHECK: [[b1:%[0-9]+]] = getelementptr float, ptr addrspace(1) %arr, i64 [[bump:%[0-9]+]] |
9 |
| -; CHECK: [[b2:%[0-9]+]] = getelementptr float, ptr addrspace(1) [[b1]], i64 [[bump]] |
10 | 7 | define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(ptr addrspace(1) %out, ptr addrspace(1) noalias %arr, i32 %i) {
|
| 8 | +; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset( |
| 9 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| 10 | +; CHECK-NEXT: bb: |
| 11 | +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 |
| 12 | +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[ARR]], i64 [[TMP0]] |
| 13 | +; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP1]], i64 1023 |
| 14 | +; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(1) [[P12]], align 4 |
| 15 | +; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| 16 | +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i64 [[TMP0]] |
| 17 | +; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[TMP2]], i64 1023 |
| 18 | +; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(1) [[P24]], align 4 |
| 19 | +; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| 20 | +; CHECK-NEXT: ret void |
| 21 | +; |
11 | 22 | bb:
|
12 | 23 | %i2 = shl nsw i32 %i, 1
|
13 | 24 | %j1 = add nsw i32 %i, 1023
|
|
25 | 36 | ret void
|
26 | 37 | }
|
27 | 38 |
|
28 |
| -; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset( |
29 |
| -; CHECK: %j1 = add nsw i32 %i, 1024 |
30 |
| -; CHECK: %tmp = sext i32 %j1 to i64 |
31 |
| -; CHECK: getelementptr inbounds float, ptr addrspace(1) %arr, i64 %tmp |
32 |
| -; CHECK: getelementptr inbounds float, ptr addrspace(1) %arr, i64 %tmp5 |
33 | 39 | define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(ptr addrspace(1) %out, ptr addrspace(1) noalias %arr, i32 %i) {
|
| 40 | +; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset( |
| 41 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| 42 | +; CHECK-NEXT: bb: |
| 43 | +; CHECK-NEXT: [[J1:%.*]] = add nsw i32 [[I]], 1024 |
| 44 | +; CHECK-NEXT: [[TMP:%.*]] = sext i32 [[J1]] to i64 |
| 45 | +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARR]], i64 [[TMP]] |
| 46 | +; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(1) [[P1]], align 4 |
| 47 | +; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| 48 | +; CHECK-NEXT: [[J2:%.*]] = add i32 [[J1]], [[I]] |
| 49 | +; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[J2]] to i64 |
| 50 | +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[ARR]], i64 [[TMP5]] |
| 51 | +; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(1) [[P2]], align 4 |
| 52 | +; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| 53 | +; CHECK-NEXT: ret void |
| 54 | +; |
34 | 55 | bb:
|
35 | 56 | %i2 = shl nsw i32 %i, 1
|
36 | 57 | %j1 = add nsw i32 %i, 1024
|
|
48 | 69 | ret void
|
49 | 70 | }
|
50 | 71 |
|
51 |
| -; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset( |
52 |
| -; CHECK: [[B1:%[0-9]+]] = getelementptr float, ptr addrspace(3) %arr, i32 %i |
53 |
| -; CHECK: getelementptr inbounds float, ptr addrspace(3) [[B1]], i32 16383 |
54 | 72 |
|
55 |
| -; CHECK: [[B2:%[0-9]+]] = getelementptr float, ptr addrspace(3) [[B1]], i32 %i |
56 |
| -; CHECK: getelementptr inbounds float, ptr addrspace(3) [[B2]], i32 16383 |
57 | 73 | define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) noalias %arr, i32 %i) {
|
| 74 | +; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset( |
| 75 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| 76 | +; CHECK-NEXT: bb: |
| 77 | +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr addrspace(3) [[ARR]], i32 [[I]] |
| 78 | +; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP0]], i32 16383 |
| 79 | +; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(3) [[P12]], align 4 |
| 80 | +; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| 81 | +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(3) [[TMP0]], i32 [[I]] |
| 82 | +; CHECK-NEXT: [[P24:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[TMP1]], i32 16383 |
| 83 | +; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(3) [[P24]], align 4 |
| 84 | +; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| 85 | +; CHECK-NEXT: ret void |
| 86 | +; |
58 | 87 | bb:
|
59 | 88 | %i2 = shl nsw i32 %i, 1
|
60 | 89 | %j1 = add nsw i32 %i, 16383
|
|
70 | 99 | ret void
|
71 | 100 | }
|
72 | 101 |
|
73 |
| -; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset( |
74 |
| -; CHECK: %j1 = add nsw i32 %i, 16384 |
75 |
| -; CHECK: getelementptr inbounds float, ptr addrspace(3) %arr, i32 %j1 |
76 |
| -; CHECK: %j2 = add i32 %j1, %i |
77 |
| -; CHECK: getelementptr inbounds float, ptr addrspace(3) %arr, i32 %j2 |
78 | 102 | define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(ptr addrspace(1) %out, ptr addrspace(3) noalias %arr, i32 %i) {
|
| 103 | +; CHECK-LABEL: define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset( |
| 104 | +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(3) noalias [[ARR:%.*]], i32 [[I:%.*]]) { |
| 105 | +; CHECK-NEXT: bb: |
| 106 | +; CHECK-NEXT: [[J1:%.*]] = add nsw i32 [[I]], 16384 |
| 107 | +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[ARR]], i32 [[J1]] |
| 108 | +; CHECK-NEXT: [[V11:%.*]] = load i32, ptr addrspace(3) [[P1]], align 4 |
| 109 | +; CHECK-NEXT: store i32 [[V11]], ptr addrspace(1) [[OUT]], align 4 |
| 110 | +; CHECK-NEXT: [[J2:%.*]] = add i32 [[J1]], [[I]] |
| 111 | +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr addrspace(3) [[ARR]], i32 [[J2]] |
| 112 | +; CHECK-NEXT: [[V22:%.*]] = load i32, ptr addrspace(3) [[P2]], align 4 |
| 113 | +; CHECK-NEXT: store i32 [[V22]], ptr addrspace(1) [[OUT]], align 4 |
| 114 | +; CHECK-NEXT: ret void |
| 115 | +; |
79 | 116 | bb:
|
80 | 117 | %i2 = shl nsw i32 %i, 1
|
81 | 118 | %j1 = add nsw i32 %i, 16384
|
|
0 commit comments