Skip to content

Commit f50f969

Browse files
authored
[MLIR][GPU] Fix gpu.printf (#121940)
1 parent f0d5104 commit f50f969

16 files changed

+41
-28
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,7 @@ def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
10551055
imposed by one's target platform.
10561056
}];
10571057
let assemblyFormat = [{
1058-
$format attr-dict ($args^ `:` type($args))?
1058+
$format attr-dict (`,` $args^ `:` type($args))?
10591059
}];
10601060
}
10611061

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ gpu.module @test_module_29 {
633633
// CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)>
634634
// CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr
635635
// CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32
636-
gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32
636+
gpu.printf "Hello: %d\n", %arg0, %arg1 : i32, f32
637637
gpu.return
638638
}
639639
}

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ gpu.module @test_module {
3636
// CHECK-NEXT: %[[NARGS1:.*]] = llvm.mlir.constant(1 : i32) : i32
3737
// CHECK-NEXT: %[[ARG0_64:.*]] = llvm.zext %[[ARG0]] : i32 to i64
3838
// CHECK-NEXT: %{{.*}} = llvm.call @__ockl_printf_append_args(%[[DESC1]], %[[NARGS1]], %[[ARG0_64]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[ISLAST]]) : (i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) -> i64
39-
gpu.printf "Hello: %d\n" %arg0 : i32
39+
gpu.printf "Hello: %d\n", %arg0 : i32
4040
gpu.return
4141
}
4242
}

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ gpu.module @test_module {
99
// CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<4>
1010
// CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<4>) -> !llvm.ptr<4>, !llvm.array<11 x i8>
1111
// CHECK-NEXT: %{{.*}} = llvm.call @printf(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<4>, ...)>) : (!llvm.ptr<4>, i32) -> i32
12-
gpu.printf "Hello: %d\n" %arg0 : i32
12+
gpu.printf "Hello: %d\n", %arg0 : i32
1313
gpu.return
1414
}
1515
}

mlir/test/Conversion/GPUToSPIRV/printf.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ module attributes {
6262
// CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant>
6363
// CHECK-NEXT: [[FMTSTR_PTR1:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant> to !spirv.ptr<i8, UniformConstant>
6464
// CHECK-NEXT: {{%.*}} = spirv.CL.printf [[FMTSTR_PTR1]] {{%.*}}, {{%.*}}, {{%.*}} : !spirv.ptr<i8, UniformConstant>, i32, f32, i32 -> i32
65-
gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index
65+
gpu.printf "\nHello, world : %d %f \n Thread id: %d\n", %arg0, %arg1, %2: i32, f32, index
6666

6767
// CHECK: spirv.Return
6868
gpu.return

mlir/test/Dialect/GPU/indirect-device-func-call.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ gpu.module @kernels {
66
func.func @hello(%arg0 : f32) {
77
%tid_x = gpu.thread_id x
88
%csti8 = arith.constant 2 : i8
9-
gpu.printf "Hello from %lld, %d, %f\n" %tid_x, %csti8, %arg0 : index, i8, f32
9+
gpu.printf "Hello from %lld, %d, %f\n", %tid_x, %csti8, %arg0 : index, i8, f32
1010
return
1111
}
1212
// CHECK-LABEL: @hello_indirect

mlir/test/Dialect/GPU/ops.mlir

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,22 @@ module attributes {gpu.container_module} {
229229

230230
// CHECK-LABEL: gpu.func @printf_test
231231
// CHECK: (%[[ARG0:.*]]: i32)
232-
// CHECK: gpu.printf "Value: %d" %[[ARG0]] : i32
232+
// CHECK: gpu.printf "Value: %d", %[[ARG0]] : i32
233233
gpu.func @printf_test(%arg0 : i32) {
234-
gpu.printf "Value: %d" %arg0 : i32
234+
gpu.printf "Value: %d", %arg0 : i32
235+
gpu.return
236+
}
237+
238+
// CHECK-LABEL: gpu.func @printf_empty
239+
// CHECK: gpu.printf "]"
240+
// CHECK: scf.if
241+
// CHECK: gpu.printf ", "
242+
gpu.func @printf_empty(%arg0 : i32) {
243+
gpu.printf "]"
244+
%1 = arith.cmpi slt, %arg0, %arg0 : i32
245+
scf.if %1 {
246+
gpu.printf ", "
247+
}
235248
gpu.return
236249
}
237250

mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ func.func @test_math(%arg0 : f32) {
2323
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
2424
// CHECK-NVVM: __nv_expf
2525
%s1 = math.exp %arg0 : f32
26-
gpu.printf "%f" %s1 : f32
26+
gpu.printf "%f", %s1 : f32
2727
gpu.terminator
2828
}
2929
return

mlir/test/Integration/GPU/CUDA/assert.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ gpu.module @kernels {
1616
gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
1717
%0 = gpu.thread_id x
1818
cf.assert %c1, "passing assertion"
19-
gpu.printf "thread %lld: print after passing assertion\n" %0 : index
19+
gpu.printf "thread %lld: print after passing assertion\n", %0 : index
2020
// Test callsite(callsite(name)) location.
2121
cf.assert %c0, "failing assertion" loc(callsite(callsite("callee_func_name"("callee_file.cc":7:9) at "caller_file.cc":10:8) at "caller2_file.cc":11:12))
22-
gpu.printf "thread %lld: print after failing assertion\n" %0 : index
22+
gpu.printf "thread %lld: print after failing assertion\n", %0 : index
2323
gpu.return
2424
}
2525
}

mlir/test/Integration/GPU/CUDA/printf.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
1414
%0 = gpu.thread_id x
1515
%csti8 = arith.constant 2 : i8
1616
%cstf32 = arith.constant 3.0 : f32
17-
gpu.printf "Hello from %lld, %d, %f\n" %0, %csti8, %cstf32 : index, i8, f32
17+
gpu.printf "Hello from %lld, %d, %f\n", %0, %csti8, %cstf32 : index, i8, f32
1818
gpu.return
1919
}
2020
}

mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ module attributes {gpu.container_module} {
4343
%cnd2 = arith.cmpi eq, %bidY, %c3 : index
4444
scf.if %cnd1 {
4545
scf.if %cnd2 {
46-
gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n"
46+
gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
4747
%cidX_i32,
4848
%cidY_i32,
4949
%cidZ_i32,

mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ module @mymod {
8585

8686
// Step 7. First thread does TMA load
8787
scf.if %10 {
88-
gpu.printf "[GPU] TMA SIZE %d\0A" %c8192 : index
88+
gpu.printf "[GPU] TMA SIZE %d\0A", %c8192 : index
8989
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : !lhsTensorMap, !barrierType -> !shmemlhs
9090
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c8192 : !barrierType
9191
} else {
@@ -98,16 +98,16 @@ module @mymod {
9898

9999
// Step 9. Print loaded data in 128b swizzled
100100
scf.if %10 {
101-
gpu.printf "===--- Matrix A ---=== %d \0A" %c-1_i32 : i32
101+
gpu.printf "===--- Matrix A ---=== %d \0A", %c-1_i32 : i32
102102
scf.for %arg12 = %c0 to %c128 step %c1 {
103103
scf.for %arg13 = %c0 to %c64 step %c1 {
104104
%15 = memref.load %7[%arg12, %arg13] : !shmemlhs
105105
%16 = arith.extf %15 : f16 to f32
106-
gpu.printf "%.0f, " %16 : f32
106+
gpu.printf "%.0f, ", %16 : f32
107107
}
108-
gpu.printf "%d\0A" %c-1_i32 : i32
108+
gpu.printf "%d\0A", %c-1_i32 : i32
109109
}
110-
gpu.printf "===----------------=== %d \0A" %c-1_i32 : i32
110+
gpu.printf "===----------------=== %d \0A", %c-1_i32 : i32
111111
}
112112
gpu.terminator
113113
}

mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ module @mymod {
109109

110110
// Step 6. First thread does TMA load
111111
scf.if %10 {
112-
gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
112+
gpu.printf "[GPU] TMA SIZE %d\0A", %c32768 : index
113113
nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
114114
nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
115115
nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
@@ -124,7 +124,7 @@ module @mymod {
124124

125125
// Step 8. Print loaded data in 128b swizzled
126126
scf.if %10 {
127-
gpu.printf "===--- Matrix B ---=== %d \n" %c-1_i32 : i32
127+
gpu.printf "===--- Matrix B ---=== %d \n", %c-1_i32 : i32
128128
scf.for %ii = %c0 to %c64 step %c1 {
129129
scf.for %j = %c0 to %c128 step %c1 {
130130
%lhs0 = memref.load %rhsShmem[%ii, %j] : !shmemrhs
@@ -133,7 +133,7 @@ module @mymod {
133133
}
134134
gpu.printf "%d\n" %c-1_i32 : i32
135135
}
136-
gpu.printf "===----------------=== %d \n" %c-1_i32 : i32
136+
gpu.printf "===----------------=== %d \n", %c-1_i32 : i32
137137
}
138138
gpu.barrier
139139
gpu.terminator

mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ module @mymod {
8080
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c6144 : <memorySpace = #gpu.address_space<workgroup>>
8181
%11 = memref.load %7[%c0, %c0] : memref<64x8xf32, 3>
8282
%12 = memref.load %8[%c0, %c0] : memref<8x128xf32, 3>
83-
gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A" %11 : f32
84-
gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A" %12 : f32
83+
gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A", %11 : f32
84+
gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A", %12 : f32
8585
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : <tensor = memref<64x8xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<64x8xf32, 3>
8686
nvgpu.tma.async.load %4[%c0, %c0], %9[%c0] to %8 : <tensor = memref<8x128xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<8x128xf32, 3>
8787
} else {
@@ -92,8 +92,8 @@ module @mymod {
9292
scf.if %10 {
9393
%11 = memref.load %7[%c45, %c7] : memref<64x8xf32, 3>
9494
%12 = memref.load %8[%c7, %c0] : memref<8x128xf32, 3>
95-
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
96-
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
95+
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
96+
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
9797
}
9898
gpu.terminator
9999
}

mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ func.func @main() {
9696
scf.if %10 {
9797
%11 = memref.load %out[%c45, %c7] : memref<64x8xf32, 3>
9898
%12 = memref.load %out_1[%c7, %c0] : memref<8x128xf32, 3>
99-
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
100-
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
99+
gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
100+
gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
101101
}
102102
gpu.terminator
103103
}

mlir/test/Integration/GPU/ROCM/printf.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ module attributes {gpu.container_module} {
1313
gpu.module @kernels {
1414
gpu.func @hello() kernel {
1515
%0 = gpu.thread_id x
16-
gpu.printf "Hello from %d\n" %0 : index
16+
gpu.printf "Hello from %d\n", %0 : index
1717
gpu.return
1818
}
1919
}

0 commit comments

Comments
 (0)