@@ -384,3 +384,47 @@ func.func @parallel_no_annotations(%arg0 : index, %arg1 : index, %arg2 : index,
384
384
385
385
// CHECK-LABEL: @parallel_no_annotations
386
386
// CHECK: scf.parallel
387
+
388
+ // -----
389
+
390
+ // CHECK-LABEL: @step_invariant
391
+ func.func @step_invariant () {
392
+ %alloc = memref.alloc () : memref <1 x1 xf64 >
393
+ %alloc_0 = memref.alloc () : memref <1 x1 xf64 >
394
+ %alloc_1 = memref.alloc () : memref <1 x1 xf64 >
395
+ %c0 = arith.constant 0 : index
396
+ %c1 = arith.constant 1 : index
397
+ %c1_2 = arith.constant 1 : index
398
+ scf.parallel (%arg0 ) = (%c0 ) to (%c1 ) step (%c1_2 ) {
399
+ %c0_3 = arith.constant 0 : index
400
+ %c1_4 = arith.constant 1 : index
401
+ %c1_5 = arith.constant 1 : index
402
+ scf.parallel (%arg1 ) = (%c0_3 ) to (%c1_4 ) step (%c1_5 ) {
403
+ %0 = memref.load %alloc_1 [%arg0 , %arg1 ] : memref <1 x1 xf64 >
404
+ %1 = memref.load %alloc_0 [%arg0 , %arg1 ] : memref <1 x1 xf64 >
405
+ %2 = arith.addf %0 , %1 : f64
406
+ memref.store %2 , %alloc [%arg0 , %arg1 ] : memref <1 x1 xf64 >
407
+ scf.yield
408
+ } {mapping = [#gpu.loop_dim_map <processor = thread_x , map = (d0 ) -> (d0 ), bound = (d0 ) -> (d0 )>]}
409
+ scf.yield
410
+ } {mapping = [#gpu.loop_dim_map <processor = block_x , map = (d0 ) -> (d0 ), bound = (d0 ) -> (d0 )>]}
411
+ memref.dealloc %alloc_1 : memref <1 x1 xf64 >
412
+ memref.dealloc %alloc_0 : memref <1 x1 xf64 >
413
+ memref.dealloc %alloc : memref <1 x1 xf64 >
414
+ return
415
+ }
416
+
417
+ // CHECK: %[[alloc_0:.*]] = memref.alloc() : memref<1x1xf64>
418
+ // CHECK: %[[alloc_1:.*]] = memref.alloc() : memref<1x1xf64>
419
+ // CHECK: %[[alloc_2:.*]] = memref.alloc() : memref<1x1xf64>
420
+ // CHECK: %[[map_0:.*]] = affine.apply #map({{.*}})[{{.*}}, {{.*}}]
421
+ // CHECK: %[[map_1:.*]] = affine.apply #map({{.*}})[{{.*}}, {{.*}}]
422
+ // CHECK: gpu.launch
423
+ // CHECK-SAME: blocks(%[[arg_0:.*]], %{{[^)]*}}, %{{[^)]*}}) in (%{{[^)]*}} = %[[map_0]], %{{[^)]*}} = %{{[^)]*}}, %{{[^)]*}} = %{{[^)]*}})
424
+ // CHECK-SAME: threads(%[[arg_3:.*]], %{{[^)]*}}, %{{[^)]*}}) in (%{{[^)]*}} = %[[map_1]], %{{[^)]*}} = %{{[^)]*}}, %{{[^)]*}} = %{{[^)]*}})
425
+ // CHECK: %[[dim0:.*]] = affine.apply #map1(%[[arg_0]])[{{.*}}, {{.*}}]
426
+ // CHECK: %[[dim1:.*]] = affine.apply #map1(%[[arg_3]])[{{.*}}, {{.*}}]
427
+ // CHECK: %[[lhs:.*]] = memref.load %[[alloc_2]][%[[dim0]], %[[dim1]]] : memref<1x1xf64>
428
+ // CHECK: %[[rhs:.*]] = memref.load %[[alloc_1]][%[[dim0]], %[[dim1]]] : memref<1x1xf64>
429
+ // CHECK: %[[sum:.*]] = arith.addf %[[lhs]], %[[rhs]] : f64
430
+ // CHECK: memref.store %[[sum]], %[[alloc_0]][%[[dim0]], %[[dim1]]] : memref<1x1xf64>
0 commit comments