|
| 1 | +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV |
| 2 | +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} |
| 3 | + |
| 4 | +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV |
| 5 | +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} |
| 6 | + |
| 7 | +; CHECK-SPIRV: OpCapability Groups |
| 8 | +; CHECK-SPIRV-DAG: %[[#Int32Ty:]] = OpTypeInt 32 0 |
| 9 | +; CHECK-SPIRV-DAG: %[[#Int64Ty:]] = OpTypeInt 64 0 |
| 10 | +; CHECK-SPIRV-DAG: %[[#Float32Ty:]] = OpTypeFloat 32 |
| 11 | +; CHECK-SPIRV-DAG: %[[#Vec2Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 2 |
| 12 | +; CHECK-SPIRV-DAG: %[[#Vec3Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 3 |
| 13 | +; CHECK-SPIRV-DAG: %[[#Vec2Int64Ty:]] = OpTypeVector %[[#Int64Ty]] 2 |
| 14 | +; CHECK-SPIRV-DAG: %[[#C2:]] = OpConstant %[[#Int32Ty]] 2 |
| 15 | + |
| 16 | +; CHECK-SPIRV: OpFunction |
| 17 | +; CHECK-SPIRV: %[[#Val:]] = OpFunctionParameter %[[#Int32Ty]] |
| 18 | +; CHECK-SPIRV: %[[#X:]] = OpFunctionParameter %[[#Int32Ty]] |
| 19 | +; CHECK-SPIRV: %[[#Y:]] = OpFunctionParameter %[[#Int32Ty]] |
| 20 | +; CHECK-SPIRV: %[[#Z:]] = OpFunctionParameter %[[#Int32Ty]] |
| 21 | +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Int32Ty]] %[[#C2]] %[[#Val]] %[[#X]] |
| 22 | +; CHECK-SPIRV: %[[#XY:]] = OpCompositeConstruct %[[#Vec2Int32Ty]] %[[#X]] %[[#Y]] |
| 23 | +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Int32Ty]] %[[#C2]] %[[#Val]] %[[#XY]] |
| 24 | +; CHECK-SPIRV: %[[#XYZ:]] = OpCompositeConstruct %[[#Vec3Int32Ty]] %[[#X]] %[[#Y]] %[[#Z]] |
| 25 | +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Int32Ty]] %[[#C2]] %[[#Val]] %[[#XYZ]] |
| 26 | +define spir_kernel void @test_broadcast_xyz(i32 noundef %a, i32 noundef %x, i32 noundef %y, i32 noundef %z) { |
| 27 | +entry: |
| 28 | + %call1 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %x) |
| 29 | + %call2 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %x, i32 noundef %y) |
| 30 | + %call3 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %x, i32 noundef %y, i32 noundef %z) |
| 31 | + ret void |
| 32 | +} |
| 33 | + |
| 34 | +declare spir_func i32 @_Z20work_group_broadcastjj(i32, i32) |
| 35 | +declare spir_func i32 @_Z20work_group_broadcastjjj(i32, i32, i32) |
| 36 | +declare spir_func i32 @_Z20work_group_broadcastjjjj(i32, i32, i32, i32) |
| 37 | + |
| 38 | +; CHECK-SPIRV: OpFunction |
| 39 | +; CHECK-SPIRV: OpInBoundsPtrAccessChain |
| 40 | +; CHECK-SPIRV: %[[#LoadedVal:]] = OpLoad %[[#Float32Ty]] %[[#]] |
| 41 | +; CHECK-SPIRV: %[[#IdX:]] = OpCompositeExtract %[[#Int64Ty]] %[[#]] 0 |
| 42 | +; CHECK-SPIRV: %[[#IdY:]] = OpCompositeExtract %[[#Int64Ty]] %[[#]] 1 |
| 43 | +; CHECK-SPIRV: %[[#LocIdsVec:]] = OpCompositeConstruct %[[#Vec2Int64Ty]] %[[#IdX]] %[[#IdY]] |
| 44 | +; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Float32Ty]] %[[#C2]] %[[#LoadedVal]] %[[#LocIdsVec]] |
| 45 | +define spir_kernel void @test_wg_broadcast_2D(ptr addrspace(1) %input, ptr addrspace(1) %output) #0 !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_type_qual !10 !kernel_arg_base_type !9 !spirv.ParameterDecorations !11 { |
| 46 | +entry: |
| 47 | + %0 = call spir_func i64 @_Z13get_global_idj(i32 0) #1 |
| 48 | + %1 = insertelement <3 x i64> undef, i64 %0, i32 0 |
| 49 | + %2 = call spir_func i64 @_Z13get_global_idj(i32 1) #1 |
| 50 | + %3 = insertelement <3 x i64> %1, i64 %2, i32 1 |
| 51 | + %4 = call spir_func i64 @_Z13get_global_idj(i32 2) #1 |
| 52 | + %5 = insertelement <3 x i64> %3, i64 %4, i32 2 |
| 53 | + %call = extractelement <3 x i64> %5, i32 0 |
| 54 | + %6 = call spir_func i64 @_Z13get_global_idj(i32 0) #1 |
| 55 | + %7 = insertelement <3 x i64> undef, i64 %6, i32 0 |
| 56 | + %8 = call spir_func i64 @_Z13get_global_idj(i32 1) #1 |
| 57 | + %9 = insertelement <3 x i64> %7, i64 %8, i32 1 |
| 58 | + %10 = call spir_func i64 @_Z13get_global_idj(i32 2) #1 |
| 59 | + %11 = insertelement <3 x i64> %9, i64 %10, i32 2 |
| 60 | + %call1 = extractelement <3 x i64> %11, i32 1 |
| 61 | + %12 = call spir_func i64 @_Z12get_group_idj(i32 0) #1 |
| 62 | + %13 = insertelement <3 x i64> undef, i64 %12, i32 0 |
| 63 | + %14 = call spir_func i64 @_Z12get_group_idj(i32 1) #1 |
| 64 | + %15 = insertelement <3 x i64> %13, i64 %14, i32 1 |
| 65 | + %16 = call spir_func i64 @_Z12get_group_idj(i32 2) #1 |
| 66 | + %17 = insertelement <3 x i64> %15, i64 %16, i32 2 |
| 67 | + %call2 = extractelement <3 x i64> %17, i32 0 |
| 68 | + %18 = call spir_func i64 @_Z14get_local_sizej(i32 0) #1 |
| 69 | + %19 = insertelement <3 x i64> undef, i64 %18, i32 0 |
| 70 | + %20 = call spir_func i64 @_Z14get_local_sizej(i32 1) #1 |
| 71 | + %21 = insertelement <3 x i64> %19, i64 %20, i32 1 |
| 72 | + %22 = call spir_func i64 @_Z14get_local_sizej(i32 2) #1 |
| 73 | + %23 = insertelement <3 x i64> %21, i64 %22, i32 2 |
| 74 | + %call3 = extractelement <3 x i64> %23, i32 0 |
| 75 | + %rem = urem i64 %call2, %call3 |
| 76 | + %24 = call spir_func i64 @_Z12get_group_idj(i32 0) #1 |
| 77 | + %25 = insertelement <3 x i64> undef, i64 %24, i32 0 |
| 78 | + %26 = call spir_func i64 @_Z12get_group_idj(i32 1) #1 |
| 79 | + %27 = insertelement <3 x i64> %25, i64 %26, i32 1 |
| 80 | + %28 = call spir_func i64 @_Z12get_group_idj(i32 2) #1 |
| 81 | + %29 = insertelement <3 x i64> %27, i64 %28, i32 2 |
| 82 | + %call4 = extractelement <3 x i64> %29, i32 1 |
| 83 | + %30 = call spir_func i64 @_Z14get_local_sizej(i32 0) #1 |
| 84 | + %31 = insertelement <3 x i64> undef, i64 %30, i32 0 |
| 85 | + %32 = call spir_func i64 @_Z14get_local_sizej(i32 1) #1 |
| 86 | + %33 = insertelement <3 x i64> %31, i64 %32, i32 1 |
| 87 | + %34 = call spir_func i64 @_Z14get_local_sizej(i32 2) #1 |
| 88 | + %35 = insertelement <3 x i64> %33, i64 %34, i32 2 |
| 89 | + %call5 = extractelement <3 x i64> %35, i32 1 |
| 90 | + %rem6 = urem i64 %call4, %call5 |
| 91 | + %36 = call spir_func i64 @_Z15get_global_sizej(i32 0) #1 |
| 92 | + %37 = insertelement <3 x i64> undef, i64 %36, i32 0 |
| 93 | + %38 = call spir_func i64 @_Z15get_global_sizej(i32 1) #1 |
| 94 | + %39 = insertelement <3 x i64> %37, i64 %38, i32 1 |
| 95 | + %40 = call spir_func i64 @_Z15get_global_sizej(i32 2) #1 |
| 96 | + %41 = insertelement <3 x i64> %39, i64 %40, i32 2 |
| 97 | + %call7 = extractelement <3 x i64> %41, i32 0 |
| 98 | + %mul = mul i64 %call1, %call7 |
| 99 | + %add = add i64 %mul, %call |
| 100 | + %arrayidx = getelementptr inbounds float, ptr addrspace(1) %input, i64 %add |
| 101 | + %42 = load float, ptr addrspace(1) %arrayidx, align 4 |
| 102 | + %.splatinsert = insertelement <2 x i64> undef, i64 %rem, i32 0 |
| 103 | + %.splat = shufflevector <2 x i64> %.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer |
| 104 | + %43 = insertelement <2 x i64> %.splat, i64 %rem6, i32 1 |
| 105 | + %44 = extractelement <2 x i64> %43, i32 0 |
| 106 | + %45 = extractelement <2 x i64> %43, i32 1 |
| 107 | + %call8 = call spir_func float @_Z20work_group_broadcastfmm(float %42, i64 %44, i64 %45) #2 |
| 108 | + %arrayidx9 = getelementptr inbounds float, ptr addrspace(1) %output, i64 %add |
| 109 | + store float %call8, ptr addrspace(1) %arrayidx9, align 4 |
| 110 | + ret void |
| 111 | +} |
| 112 | + |
| 113 | +; Function Attrs: nounwind willreturn memory(none) |
| 114 | +declare spir_func i64 @_Z13get_global_idj(i32) #1 |
| 115 | + |
| 116 | +; Function Attrs: nounwind willreturn memory(none) |
| 117 | +declare spir_func i64 @_Z12get_group_idj(i32) #1 |
| 118 | + |
| 119 | +; Function Attrs: nounwind willreturn memory(none) |
| 120 | +declare spir_func i64 @_Z14get_local_sizej(i32) #1 |
| 121 | + |
| 122 | +; Function Attrs: nounwind willreturn memory(none) |
| 123 | +declare spir_func i64 @_Z15get_global_sizej(i32) #1 |
| 124 | + |
| 125 | +; Function Attrs: convergent nounwind |
| 126 | +declare spir_func float @_Z20work_group_broadcastfmm(float, i64, i64) #2 |
| 127 | + |
| 128 | +attributes #0 = { nounwind } |
| 129 | +attributes #1 = { nounwind willreturn memory(none) } |
| 130 | +attributes #2 = { convergent nounwind } |
| 131 | + |
| 132 | +!spirv.MemoryModel = !{!0} |
| 133 | +!opencl.enable.FP_CONTRACT = !{} |
| 134 | +!spirv.Source = !{!1} |
| 135 | +!opencl.spir.version = !{!2} |
| 136 | +!opencl.ocl.version = !{!3} |
| 137 | +!opencl.used.extensions = !{!4} |
| 138 | +!opencl.used.optional.core.features = !{!5} |
| 139 | +!spirv.Generator = !{!6} |
| 140 | + |
| 141 | +!0 = !{i32 2, i32 2} |
| 142 | +!1 = !{i32 3, i32 300000} |
| 143 | +!2 = !{i32 2, i32 0} |
| 144 | +!3 = !{i32 3, i32 0} |
| 145 | +!4 = !{!"cl_khr_subgroups"} |
| 146 | +!5 = !{} |
| 147 | +!6 = !{i16 6, i16 14} |
| 148 | +!7 = !{i32 1, i32 1} |
| 149 | +!8 = !{!"none", !"none"} |
| 150 | +!9 = !{!"float*", !"float*"} |
| 151 | +!10 = !{!"", !""} |
| 152 | +!11 = !{!5, !5} |
0 commit comments