|
| 1 | +; RUN: opt -S -passes='early-cse<memssa>' %s -o %t |
| 2 | +; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t |
| 3 | +; Finish compiling to verify that dxil-op-lower removes the globals entirely. |
| 4 | +; RUN: opt -mtriple=dxil-pc-shadermodel6.0-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s |
| 5 | +; RUN: opt -mtriple=dxil-pc-shadermodel6.6-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s |
| 6 | +; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s |
| 7 | +; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s |
| 8 | + |
| 9 | +; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad. |
| 10 | +; Also that DXILOpLowering eliminates the globals entirely. |
| 11 | + |
| 12 | +%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) } |
| 13 | + |
| 14 | +; DXOP-NOT: @In = global |
| 15 | +; DXOP-NOT: @Out = global |
| 16 | +@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 |
| 17 | +@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 |
| 18 | + |
| 19 | +; CHECK-LABEL define void @main() |
| 20 | +define void @main() local_unnamed_addr #0 { |
| 21 | +entry: |
| 22 | + ; DXOP: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle |
| 23 | + ; DXOP: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle |
| 24 | + %In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false) |
| 25 | + store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4 |
| 26 | + %Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false) |
| 27 | + store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4 |
| 28 | + ; CSE: call i32 @llvm.dx.flattened.thread.id.in.group() |
| 29 | + %0 = call i32 @llvm.dx.flattened.thread.id.in.group() |
| 30 | + ; CHECK-NOT: load {{.*}} ptr @In |
| 31 | + %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 |
| 32 | + ; CSE: call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t |
| 33 | + %2 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0) |
| 34 | + ; CHECK-NOT: load {{.*}} ptr @In |
| 35 | + %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 |
| 36 | + %4 = call noundef <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0) |
| 37 | + %add.i = fadd <4 x float> %2, %4 |
| 38 | + call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i) |
| 39 | + ; CHECK: ret void |
| 40 | + ret void |
| 41 | +} |
| 42 | + |
| 43 | +; CSE-DAG: declare <4 x float> @llvm.dx.typedBufferLoad.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] |
| 44 | +; CSE-DAG: declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]] |
| 45 | + |
| 46 | +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
| 47 | + |
| 48 | +; Just need to split up the DAG searches. |
| 49 | +; CSE: attributes #0 |
| 50 | + |
| 51 | +; CSE-DAG: attributes [[ROAttr]] = { {{.*}} memory(read) } |
| 52 | +; CSE-DAG: attributes [[WOAttr]] = { {{.*}} memory(write) } |
0 commit comments