Skip to content

Commit 79399a9

Browse files
farzonlJaddyen
authored andcommitted
[DirectX] make firstbitlow intrinsic use first argument instead of return for overload type (llvm#145350)
fixes llvm#144966 Easy fix just add `dx_firstbitlow` to `DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg`
1 parent cac8d79 commit 79399a9

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
3030
switch (ID) {
3131
case Intrinsic::dx_asdouble:
3232
case Intrinsic::dx_isinf:
33+
case Intrinsic::dx_firstbitlow:
3334
return OpdIdx == 0;
3435
default:
3536
return OpdIdx == -1;

llvm/test/CodeGen/DirectX/firstbitlow.ll

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,20 @@ entry:
4141
ret <4 x i32> %2
4242
}
4343

44-
declare i32 @llvm.dx.firstbitlow.i16(i16)
45-
declare i32 @llvm.dx.firstbitlow.i32(i32)
46-
declare i32 @llvm.dx.firstbitlow.i64(i64)
47-
declare <4 x i32> @llvm.dx.firstbitlow.v4i32(<4 x i32>)
44+
define noundef <4 x i32> @test_firstbitlow_vec4_i16(<4 x i16> noundef %a) {
45+
entry:
46+
; CHECK: [[ee0:%.*]] = extractelement <4 x i16> %a, i64 0
47+
; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee0]])
48+
; CHECK: [[ee1:%.*]] = extractelement <4 x i16> %a, i64 1
49+
; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee1]])
50+
; CHECK: [[ee2:%.*]] = extractelement <4 x i16> %a, i64 2
51+
; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee2]])
52+
; CHECK: [[ee3:%.*]] = extractelement <4 x i16> %a, i64 3
53+
; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i16(i32 32, i16 [[ee3]])
54+
; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
55+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
56+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
57+
; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
58+
%2 = call <4 x i32> @llvm.dx.firstbitlow.v4i16(<4 x i16> %a)
59+
ret <4 x i32> %2
60+
}

0 commit comments

Comments
 (0)