|
35 | 35 | ret void
|
36 | 36 | }
|
37 | 37 |
|
38 |
| -; TODO: Should be able to promote this |
39 | 38 | define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) {
|
40 | 39 | ; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(
|
41 | 40 | ; CHECK-SAME: i1 [[COND:%.*]]) {
|
42 | 41 | ; CHECK-NEXT: [[BB:.*:]]
|
43 |
| -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [4 x i32], align 4, addrspace(5) |
44 |
| -; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> |
45 |
| -; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 3, i64 2, i64 1, i64 0> |
46 |
| -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]] |
47 |
| -; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1 |
48 |
| -; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 |
| 42 | +; CHECK-NEXT: [[TMP0:%.*]] = call noalias nonnull dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| 43 | +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 1 |
| 44 | +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4, !invariant.load [[META0]] |
| 45 | +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 2 |
| 46 | +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP3]], align 4, !range [[RNG1]], !invariant.load [[META0]] |
| 47 | +; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP2]], 16 |
| 48 | +; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() |
| 49 | +; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() |
| 50 | +; CHECK-NEXT: [[TMP8:%.*]] = call range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.z() |
| 51 | +; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP4]] |
| 52 | +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP6]] |
| 53 | +; CHECK-NEXT: [[TMP11:%.*]] = mul nuw nsw i32 [[TMP7]], [[TMP4]] |
| 54 | +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP11]] |
| 55 | +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP8]] |
| 56 | +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1024 x [4 x i32]], ptr addrspace(3) @scalar_alloca_ptr_with_vector_gep_offset_select.alloca, i32 0, i32 [[TMP13]] |
| 57 | +; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 0, i64 1, i64 2, i64 3> |
| 58 | +; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP14]], <4 x i64> <i64 3, i64 2, i64 1, i64 0> |
| 59 | +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(3)> [[GETELEMENTPTR0]], <4 x ptr addrspace(3)> [[GETELEMENTPTR1]] |
| 60 | +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(3)> [[SELECT]], i64 1 |
| 61 | +; CHECK-NEXT: store i32 0, ptr addrspace(3) [[EXTRACTELEMENT]], align 4 |
49 | 62 | ; CHECK-NEXT: ret void
|
50 | 63 | ;
|
51 | 64 | bb:
|
|
0 commit comments