|
| 1 | +;=========================== begin_copyright_notice ============================ |
| 2 | +; |
| 3 | +; Copyright (C) 2023 Intel Corporation |
| 4 | +; |
| 5 | +; SPDX-License-Identifier: MIT |
| 6 | +; |
| 7 | +;============================ end_copyright_notice ============================= |
| 8 | + |
| 9 | +; REQUIRES: regkeys |
| 10 | +; |
| 11 | +; No SOA transpose |
| 12 | +; RUN: igc_opt --ocl --platformpvc --igc-private-mem-resolution --regkey EnablePrivMemNewSOATranspose=2 -S %s | FileCheck --check-prefix=CHECK-K2 %s |
| 13 | +; |
| 14 | +; SOA transpose on the entire struct |
| 15 | +; RUN: igc_opt --ocl --platformpvc --igc-private-mem-resolution --regkey EnablePrivMemNewSOATranspose=3 -S %s | FileCheck --check-prefix=CHECK-K3 %s |
| 16 | +; |
| 17 | + |
| 18 | +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32" |
| 19 | +target triple = "spir64-unknown-unknown" |
| 20 | + |
| 21 | +%struct.Packed = type <{ i8, i16, i8, float }> |
| 22 | + |
| 23 | +; CHECK-LABEL: @test |
| 24 | +;; |
| 25 | +;; prolog in entry block. Get buffer's perThreadOffset |
| 26 | +;; |
| 27 | +; CHECK-K2: [[T00:%.*]] = call i16 @llvm.genx.GenISA.simdLaneId() |
| 28 | +; CHECK-K2: [[simdLaneId:%.*]] = zext i16 [[T00]] to i32 |
| 29 | +; CHECK-K2: [[simdSize:%.*]] = call i32 @llvm.genx.GenISA.simdSize() |
| 30 | +; CHECK-K2: [[T01:%.*]] = call i32 @llvm.genx.GenISA.hw.thread.id.alloca.i32() |
| 31 | +; CHECK-K2: [[T02:%.*]] = mul i32 [[simdSize]], 8192 |
| 32 | +; CHECK-K2: [[perThreadOffset:%.*]] = mul i32 [[T01]], [[T02]] |
| 33 | +; |
| 34 | +;; No SOA transpose |
| 35 | +;; |
| 36 | +; CHECK-K2: {{.*}} = mul i32 [[simdLaneId]], 8192 |
| 37 | +; |
| 38 | +;; |
| 39 | +;; |
| 40 | +; CHECK-K3: [[T00:%.*]] = call i16 @llvm.genx.GenISA.simdLaneId() |
| 41 | +; CHECK-K3: [[simdLaneId:%.*]] = zext i16 [[T00]] to i32 |
| 42 | +; CHECK-K3: [[simdSize:%.*]] = call i32 @llvm.genx.GenISA.simdSize() |
| 43 | +; CHECK-K3: [[T01:%.*]] = call i32 @llvm.genx.GenISA.hw.thread.id.alloca.i32() |
| 44 | +; CHECK-K3: [[T02:%.*]] = mul i32 [[simdSize]], 8192 |
| 45 | +; CHECK-K3: [[perThreadOffset:%.*]] = mul i32 [[T01]], [[T02]] |
| 46 | +; |
| 47 | +;; SOA transpose for the entire packed struct |
| 48 | +;; |
| 49 | +; CHECK-K3: {{.*}} = mul i32 [[simdLaneId]], 8 |
| 50 | + |
| 51 | +; Function Attrs: nofree nosync nounwind |
| 52 | +define spir_kernel void @test(i32 addrspace(1)* nocapture writeonly %d, <8 x i32> %r0, <8 x i32> %payloadHeader, <3 x i32> %enqueuedLocalSize, i16 %localIdX, i16 %localIdY, i16 %localIdZ, i8* nocapture readnone %privateBase) { |
| 53 | +entry: |
| 54 | + %payloadHeader.scalar = extractelement <8 x i32> %payloadHeader, i64 0 |
| 55 | + %enqueuedLocalSize.scalar = extractelement <3 x i32> %enqueuedLocalSize, i64 0 |
| 56 | + %r0.scalar18 = extractelement <8 x i32> %r0, i64 1 |
| 57 | + %pb = alloca [1024 x %struct.Packed ], align 4 |
| 58 | + %tmp0 = mul i32 %enqueuedLocalSize.scalar, %r0.scalar18 |
| 59 | + %localIdX3 = zext i16 %localIdX to i32 |
| 60 | + %tmp1 = add i32 %tmp0, %localIdX3 |
| 61 | + %ix = add i32 %tmp1, %payloadHeader.scalar |
| 62 | + %idx = zext i32 %ix to i64 |
| 63 | + %tmp2 = bitcast [1024 x %struct.Packed ]* %pb to i8* |
| 64 | + call void @llvm.lifetime.start.p0i8(i64 8192, i8* nonnull %tmp2) |
| 65 | + %staddr0 = getelementptr inbounds [1024 x %struct.Packed ], [1024 x %struct.Packed ]* %pb, i64 0, i64 %idx, i32 1 |
| 66 | + store i16 1, i16* %staddr0, align 1 |
| 67 | + %staddr1 = getelementptr inbounds [1024 x %struct.Packed ], [1024 x %struct.Packed ]* %pb, i64 0, i64 %idx, i32 3 |
| 68 | + store float 0.000000e+00, float* %staddr1, align 4 |
| 69 | + call void @llvm.lifetime.end.p0i8(i64 8192, i8* nonnull %tmp2) |
| 70 | +; |
| 71 | +; CHECK: ret |
| 72 | + ret void |
| 73 | +} |
| 74 | + |
| 75 | +; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn |
| 76 | +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) |
| 77 | + |
| 78 | +; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn |
| 79 | +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) |
| 80 | + |
| 81 | +!IGCMetadata = !{!0} |
| 82 | +!igc.functions = !{!6} |
| 83 | + |
| 84 | +!0 = !{!"ModuleMD", !1, !3} |
| 85 | +!1 = !{!"compOpt", !2} |
| 86 | +!2 = !{!"UseScratchSpacePrivateMemory", i1 true} |
| 87 | +!3 = !{!"FuncMD", !4, !5} |
| 88 | +!4 = !{!"FuncMDMap[1]", void (i32 addrspace(1)*, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i8*)* @test} |
| 89 | +!5 = !{!"FuncMDValue[1]", !2} |
| 90 | +!6 = !{void (i32 addrspace(1)*, <8 x i32>, <8 x i32>, <3 x i32>, i16, i16, i16, i8*)* @test, !408} |
| 91 | +!408 = !{!409, !410} |
| 92 | +!409 = !{!"function_type", i32 0} |
| 93 | +!410 = !{!"implicit_arg_desc", !411, !412, !413, !414, !415, !416, !417} |
| 94 | +!411 = !{i32 0} |
| 95 | +!412 = !{i32 1} |
| 96 | +!413 = !{i32 6} |
| 97 | +!414 = !{i32 7} |
| 98 | +!415 = !{i32 8} |
| 99 | +!416 = !{i32 9} |
| 100 | +!417 = !{i32 12} |
0 commit comments