Skip to content

Commit 835feaa

Browse files
authored
[DXIL] Add scalarization support for WaveReadLaneAt (#112570)
- Implement trivial scalarization for the `WaveReadLaneAt` DXIL intrinsic - Add test case to demonstrate the lowering path Resolves #70104
1 parent 569ad7c commit 835feaa

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ using namespace llvm;
1818
bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
1919
unsigned ScalarOpdIdx) {
2020
switch (ID) {
21+
case Intrinsic::dx_wave_readlane:
22+
return ScalarOpdIdx == 1;
2123
default:
2224
return false;
2325
}
@@ -28,6 +30,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
2830
switch (ID) {
2931
case Intrinsic::dx_frac:
3032
case Intrinsic::dx_rsqrt:
33+
case Intrinsic::dx_wave_readlane:
3134
return true;
3235
default:
3336
return false;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
2+
3+
; Test that for vector values, WaveReadLaneAt scalarizes and maps down to the
4+
; DirectX op
5+
6+
define noundef <2 x half> @wave_read_lane_v2half(<2 x half> noundef %expr, i32 %idx) {
7+
entry:
8+
; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i0, i32 %idx)
9+
; CHECK: call half @dx.op.waveReadLaneAt.f16(i32 117, half %expr.i1, i32 %idx)
10+
%ret = call <2 x half> @llvm.dx.wave.readlane.f16(<2 x half> %expr, i32 %idx)
11+
ret <2 x half> %ret
12+
}
13+
14+
define noundef <3 x i32> @wave_read_lane_v3i32(<3 x i32> noundef %expr, i32 %idx) {
15+
entry:
16+
; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i0, i32 %idx)
17+
; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i1, i32 %idx)
18+
; CHECK: call i32 @dx.op.waveReadLaneAt.i32(i32 117, i32 %expr.i2, i32 %idx)
19+
%ret = call <3 x i32> @llvm.dx.wave.readlane(<3 x i32> %expr, i32 %idx)
20+
ret <3 x i32> %ret
21+
}
22+
23+
define noundef <4 x double> @wave_read_lane_v4f64(<4 x double> noundef %expr, i32 %idx) {
24+
entry:
25+
; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i0, i32 %idx)
26+
; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i1, i32 %idx)
27+
; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i2, i32 %idx)
28+
; CHECK: call double @dx.op.waveReadLaneAt.f64(i32 117, double %expr.i3, i32 %idx)
29+
%ret = call <4 x double> @llvm.dx.wave.readlane(<4 x double> %expr, i32 %idx)
30+
ret <4 x double> %ret
31+
}
32+
33+
declare <2 x half> @llvm.dx.wave.readlane.v2f16(<2 x half>, i32)
34+
declare <3 x i32> @llvm.dx.wave.readlane.v3i32(<3 x i32>, i32)
35+
declare <4 x double> @llvm.dx.wave.readlane.v4f64(<4 x double>, i32)

0 commit comments

Comments
 (0)