|
| 1 | +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ |
| 2 | +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc |
| 3 | +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s |
| 4 | +// expected-no-diagnostics |
| 5 | + |
| 6 | +void compute_reduced_sum(int n, int &x) { |
| 7 | + #pragma omp target teams distribute parallel for reduction(+ : x) |
| 8 | + for (int i = 0; i < n; ++i) |
| 9 | + x += i; |
| 10 | + } |
| 11 | + |
| 12 | + int main() |
| 13 | + { |
| 14 | + int n = 1000; |
| 15 | + int sum = 0; |
| 16 | + compute_reduced_sum(n, sum); |
| 17 | + } |
| 18 | + |
| 19 | +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19compute_reduced_sumiRi_l7 |
| 20 | +// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { |
| 21 | +// CHECK-NEXT: entry: |
| 22 | +// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 23 | +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) |
| 24 | +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 25 | +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| 26 | +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8, addrspace(5) |
| 27 | +// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5) |
| 28 | +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) |
| 29 | +// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) |
| 30 | +// CHECK-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5) |
| 31 | +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) |
| 32 | +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) |
| 33 | +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) |
| 34 | +// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr |
| 35 | +// CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr |
| 36 | +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr |
| 37 | +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr |
| 38 | +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr |
| 39 | +// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr |
| 40 | +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr |
| 41 | +// CHECK-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr |
| 42 | +// CHECK-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr |
| 43 | +// CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr |
| 44 | +// CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr |
| 45 | +// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr |
| 46 | +// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 |
| 47 | +// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 |
| 48 | +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8 |
| 49 | +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 |
| 50 | +// CHECK-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 8 |
| 51 | +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 |
| 52 | +// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP_ASCAST]], align 8 |
| 53 | +// CHECK-NEXT: [[TMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| 54 | +// CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP3]], align 4 |
| 55 | +// CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 |
| 56 | +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 |
| 57 | +// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 |
| 58 | +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 |
| 59 | +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 |
| 60 | +// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 |
| 61 | +// CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 |
| 62 | +// CHECK-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4 |
| 63 | +// CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 |
| 64 | +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 |
| 65 | +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4 |
| 66 | +// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4 |
| 67 | +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 |
| 68 | +// CHECK-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 69 | +// CHECK-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() |
| 70 | +// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() |
| 71 | +// CHECK-NEXT: [[GPU_BLOCK_ID:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() |
| 72 | +// CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[GPU_BLOCK_ID]], [[NVPTX_NUM_THREADS]] |
| 73 | +// CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP8]] |
| 74 | +// CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 1 |
| 75 | +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 76 | +// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], [[TMP12]] |
| 77 | +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_hardware_num_blocks() |
| 78 | +// CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP13]] to i64 |
| 79 | +// CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 80 | +// CHECK-NEXT: br label [[FOR_COND:%.*]] |
| 81 | +// CHECK: for.cond: |
| 82 | +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 83 | +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 |
| 84 | +// CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] |
| 85 | +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] |
| 86 | +// CHECK: for.body: |
| 87 | +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 88 | +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 |
| 89 | +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] |
| 90 | +// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4 |
| 91 | +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[I_ASCAST]], align 4 |
| 92 | +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4 |
| 93 | +// CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], [[TMP19]] |
| 94 | +// CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(5) [[TMP3]], align 4 |
| 95 | +// CHECK-NEXT: br label [[FOR_INC:%.*]] |
| 96 | +// CHECK: for.inc: |
| 97 | +// CHECK-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() |
| 98 | +// CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[NVPTX_NUM_THREADS4]], [[TMP14]] |
| 99 | +// CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 1 |
| 100 | +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 101 | +// CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP23]], [[TMP24]] |
| 102 | +// CHECK-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV_ASCAST]], align 4 |
| 103 | +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] |
| 104 | +// CHECK: for.end: |
| 105 | +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 |
| 106 | +// CHECK-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTADDR1_ASCAST]], align 8 |
| 107 | +// CHECK-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8 |
| 108 | +// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(5) [[TMP3]], align 4 |
| 109 | +// CHECK-NEXT: call void @__kmpc_xteamr_i_16x64(i32 [[TMP29]], ptr [[TMP28]], ptr [[TMP26]], ptr [[TMP27]], ptr @__kmpc_rfun_sum_i, ptr @__kmpc_rfun_sum_lds_i, i32 0, i64 [[TMP15]], i32 [[TMP14]]) |
| 110 | +// CHECK-NEXT: ret void |
| 111 | +// |
0 commit comments