|
| 1 | +// Test lower-nontemporal pass |
| 2 | +// RUN: fir-opt --lower-nontemporal %s | FileCheck %s |
| 3 | + |
| 4 | +// CHECK-LABEL: func @_QPsimd_with_nontemporal_clause |
| 5 | +func.func @_QPsimd_with_nontemporal_clause(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}) { |
| 6 | + %c1_i32 = arith.constant 1 : i32 |
| 7 | + %0 = fir.dummy_scope : !fir.dscope |
| 8 | + %1 = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFsimd_with_nontemporal_clauseEa"} |
| 9 | + // CHECK: %[[A_DECL:.*]] = fir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEa"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 10 | + // CHECK: %[[C_DECL:.*]] = fir.declare %{{.*}} {uniq_name = "_QFsimd_with_nontemporal_clauseEc"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 11 | + %2 = fir.declare %1 {uniq_name = "_QFsimd_with_nontemporal_clauseEa"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 12 | + %3 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFsimd_with_nontemporal_clauseEb"} |
| 13 | + %4 = fir.declare %3 {uniq_name = "_QFsimd_with_nontemporal_clauseEb"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 14 | + %5 = fir.alloca i32 {bindc_name = "c", uniq_name = "_QFsimd_with_nontemporal_clauseEc"} |
| 15 | + %6 = fir.declare %5 {uniq_name = "_QFsimd_with_nontemporal_clauseEc"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 16 | + %7 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimd_with_nontemporal_clauseEi"} |
| 17 | + %8 = fir.declare %7 {uniq_name = "_QFsimd_with_nontemporal_clauseEi"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 18 | + %9 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFsimd_with_nontemporal_clauseEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32> |
| 19 | + %10 = fir.load %9 : !fir.ref<i32> |
| 20 | + // CHECK: omp.simd nontemporal(%[[A_DECL]], %[[C_DECL]] : !fir.ref<i32>, !fir.ref<i32>) private(@_QFsimd_with_nontemporal_clauseEi_private_i32 %8 -> %arg1 : !fir.ref<i32>) { |
| 21 | + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { |
| 22 | + omp.simd nontemporal(%2, %6 : !fir.ref<i32>, !fir.ref<i32>) private(@_QFsimd_with_nontemporal_clauseEi_private_i32 %8 -> %arg1 : !fir.ref<i32>) { |
| 23 | + omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%10) inclusive step (%c1_i32) { |
| 24 | + %11 = fir.declare %arg1 {uniq_name = "_QFsimd_with_nontemporal_clauseEi"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 25 | + fir.store %arg2 to %11 : !fir.ref<i32> |
| 26 | + // CHECK: %[[LOAD:.*]] = fir.load %[[A_DECL]] {nontemporal} : !fir.ref<i32> |
| 27 | + %12 = fir.load %2 : !fir.ref<i32> |
| 28 | + %13 = fir.load %4 : !fir.ref<i32> |
| 29 | + %14 = arith.addi %12, %13 : i32 |
| 30 | + // CHECK: %[[ADD_VAL:.*]] = arith.addi %{{.*}}, %{{.*}} : i32 |
| 31 | + // CHECK: fir.store %[[ADD_VAL]] to %[[C_DECL]] {nontemporal} : !fir.ref<i32> |
| 32 | + fir.store %14 to %6 : !fir.ref<i32> |
| 33 | + omp.yield |
| 34 | + } |
| 35 | + } |
| 36 | + return |
| 37 | + } |
| 38 | + |
| 39 | +// CHECK-LABEL: func.func @_QPsimd_nontemporal_allocatable |
| 40 | +func.func @_QPsimd_nontemporal_allocatable(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "y"}) { |
| 41 | + %c1_i32 = arith.constant 1 : i32 |
| 42 | + %c0 = arith.constant 0 : index |
| 43 | + %c100_i32 = arith.constant 100 : i32 |
| 44 | + %0 = fir.dummy_scope : !fir.dscope |
| 45 | + %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimd_nontemporal_allocatableEi"} |
| 46 | + %2 = fir.declare %1 {uniq_name = "_QFsimd_nontemporal_allocatableEi"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 47 | + // CHECK: %[[X_DECL:.*]] = fir.declare %{{.*}} dummy_scope %{{.*}} {fortran_attrs = #fir.var_attrs<allocatable>, |
| 48 | + // CHECK-SAME: uniq_name = "_QFsimd_nontemporal_allocatableEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 49 | + %3 = fir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsimd_nontemporal_allocatableEx"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.dscope) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 50 | + %4 = fir.declare %arg1 dummy_scope %0 {uniq_name = "_QFsimd_nontemporal_allocatableEy"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32> |
| 51 | + %5 = fir.convert %c100_i32 : (i32) -> index |
| 52 | + %6 = arith.cmpi sgt, %5, %c0 : index |
| 53 | + %7 = arith.select %6, %5, %c0 : index |
| 54 | + %8 = fir.allocmem !fir.array<?xi32>, %7 {fir.must_be_heap = true, uniq_name = "_QFsimd_nontemporal_allocatableEx.alloc"} |
| 55 | + %9 = fir.shape %7 : (index) -> !fir.shape<1> |
| 56 | + %10 = fir.embox %8(%9) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>> |
| 57 | + fir.store %10 to %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 58 | + // CHECK: omp.simd nontemporal(%[[X_DECL]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) private(@_QFsimd_nontemporal_allocatableEi_private_i32 %2 -> %arg2 : !fir.ref<i32>) { |
| 59 | + // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { |
| 60 | + omp.simd nontemporal(%3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) private(@_QFsimd_nontemporal_allocatableEi_private_i32 %2 -> %arg2 : !fir.ref<i32>) { |
| 61 | + omp.loop_nest (%arg3) : i32 = (%c1_i32) to (%c100_i32) inclusive step (%c1_i32) { |
| 62 | + %16 = fir.declare %arg2 {uniq_name = "_QFsimd_nontemporal_allocatableEi"} : (!fir.ref<i32>) -> !fir.ref<i32> |
| 63 | + fir.store %arg3 to %16 : !fir.ref<i32> |
| 64 | + // CHECK: %[[VAL1:.*]] = fir.load %[[X_DECL]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 65 | + %17 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 66 | + %18 = fir.load %16 : !fir.ref<i32> |
| 67 | + %19 = fir.convert %18 : (i32) -> i64 |
| 68 | + // CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[VAL1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> |
| 69 | + %20 = fir.box_addr %17 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> |
| 70 | + %c0_0 = arith.constant 0 : index |
| 71 | + %21:3 = fir.box_dims %17, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index) |
| 72 | + %22 = fir.shape_shift %21#0, %21#1 : (index, index) -> !fir.shapeshift<1> |
| 73 | + // CHECK: %[[ARR_COOR:.*]] = fir.array_coor %[[BOX_ADDR]](%{{.*}}) %{{.*}} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>, i64) -> !fir.ref<i32> |
| 74 | + %23 = fir.array_coor %20(%22) %19 : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>, i64) -> !fir.ref<i32> |
| 75 | + // CHECK: %[[VAL2:.*]] = fir.load %[[ARR_COOR]] {nontemporal} : !fir.ref<i32> |
| 76 | + %24 = fir.load %23 : !fir.ref<i32> |
| 77 | + %25 = fir.load %4 : !fir.ref<i32> |
| 78 | + %26 = arith.addi %24, %25 : i32 |
| 79 | + %27 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 80 | + %28 = fir.load %16 : !fir.ref<i32> |
| 81 | + %29 = fir.convert %28 : (i32) -> i64 |
| 82 | + %30 = fir.box_addr %27 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> |
| 83 | + %c0_1 = arith.constant 0 : index |
| 84 | + %31:3 = fir.box_dims %27, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index) |
| 85 | + %32 = fir.shape_shift %31#0, %31#1 : (index, index) -> !fir.shapeshift<1> |
| 86 | + %33 = fir.array_coor %30(%32) %29 : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>, i64) -> !fir.ref<i32> |
| 87 | + // CHECK: fir.store %{{.*}} to %{{.*}} {nontemporal} : !fir.ref<i32> |
| 88 | + fir.store %26 to %33 : !fir.ref<i32> |
| 89 | + omp.yield |
| 90 | + } |
| 91 | + } |
| 92 | + %11 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 93 | + %12 = fir.box_addr %11 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> |
| 94 | + fir.freemem %12 : !fir.heap<!fir.array<?xi32>> |
| 95 | + %13 = fir.zero_bits !fir.heap<!fir.array<?xi32>> |
| 96 | + %14 = fir.shape %c0 : (index) -> !fir.shape<1> |
| 97 | + %15 = fir.embox %13(%14) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>> |
| 98 | + fir.store %15 to %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> |
| 99 | + return |
| 100 | + } |
| 101 | + |
0 commit comments