Skip to content

Commit cbd1d45

Browse files
igorban-inteligcbot
authored andcommitted
Lower llvm.bitreverse.i64 to 2 i32
.
1 parent 590830f commit cbd1d45

File tree

2 files changed

+101
-6
lines changed

2 files changed

+101
-6
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ class GenXLowering : public FunctionPass {
252252
bool lowerDebugTrap(CallInst *CI);
253253
bool lowerFMulAdd(CallInst *CI);
254254
bool lowerAddcSubb(CallInst *CI, unsigned IntrinsicID);
255+
bool lower64Bitreverse(CallInst *CI);
255256
bool lowerBitreverse(CallInst *CI);
256257
bool lowerFunnelShift(CallInst *CI, unsigned IntrinsicID);
257258
bool lowerMathIntrinsic(CallInst *CI, GenXIntrinsic::ID GenXID,
@@ -4218,6 +4219,31 @@ bool GenXLowering::lowerLzd(Instruction *Inst) {
42184219
return true;
42194220
}
42204221

4222+
bool GenXLowering::lower64Bitreverse(CallInst *CI) {
4223+
// %1 = call i64 @llvm.bitreverse.i64(i64 %in)
4224+
// to
4225+
// %1 = call i64 @llvm.bitreverse.i64(i64 %in)
4226+
// {inH, inL} = rdregion.32 in
4227+
// inRH = bitreverse.32 inH
4228+
// inRL = bitreverse.32 inL
4229+
// res = wrregion.64 {inRL, inRH}
4230+
4231+
auto *InType = CI->getType();
4232+
IRBuilder<> IRB{CI};
4233+
auto Split = IVSplitter(*CI).splitValueLoHi(*CI->getOperand(0));
4234+
auto *ResTy = Split.Lo->getType();
4235+
Value *LoReverse =
4236+
IRB.CreateIntrinsic(Intrinsic::bitreverse, {ResTy}, {Split.Lo});
4237+
Value *HiReverse =
4238+
IRB.CreateIntrinsic(Intrinsic::bitreverse, {ResTy}, {Split.Hi});
4239+
Value *Result = IVSplitter(*CI).combineLoHiSplit(
4240+
{LoReverse, HiReverse}, CI->getName() + ".", InType->isIntegerTy());
4241+
4242+
CI->replaceAllUsesWith(Result);
4243+
ToErase.push_back(CI);
4244+
return true;
4245+
}
4246+
42214247
// %1 = call i8 @llvm.bitreverse.i8(i8 %in)
42224248
// to
42234249
// %1.zext = zext i8 %in to i32
@@ -4244,11 +4270,7 @@ bool GenXLowering::lowerBitreverse(CallInst *CI) {
42444270
auto OriginalElementBitSize = OriginalType->getScalarSizeInBits();
42454271
int ShiftSize = 32 - OriginalElementBitSize;
42464272
if (ShiftSize < 0) {
4247-
DiagnosticInfoLowering Err(CI,
4248-
"currently llvm.bitreverse with bitsize bigger "
4249-
"than 32 is not supported",
4250-
DS_Error);
4251-
CI->getContext().diagnose(Err);
4273+
return lower64Bitreverse(CI);
42524274
}
42534275
Value *ShiftSizeVal = ConstantInt::get(BfrevType, ShiftSize);
42544276
IRBuilder<> Builder(CI);

IGC/VectorCompiler/test/Lowering/llvm-intrinsics.ll

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,43 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %input)
4949
ret <8 x i32> %ret
5050
}
5151

52+
53+
declare i64 @llvm.bitreverse.i64(i64)
54+
declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>)
55+
56+
; CHECK-LABEL: test_bitreverse_i64
57+
define i64 @test_bitreverse_i64(i64 %input)
58+
{
59+
; CHECK: [[CAST:%[^ ]+]] = bitcast i64 %input to <2 x i32>
60+
; CHECK: [[LO_SPLIT:%[^ ]+]] = call <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32> [[CAST]], i32 0, i32 1, i32 2, i16 0
61+
; CHECK: [[HI_SPLIT:%[^ ]+]] = call <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32> [[CAST]], i32 0, i32 1, i32 2, i16 4
62+
; CHECK: [[REV_LO:%[^ ]+]] = call <1 x i32> @llvm.genx.bfrev.v1i32(<1 x i32> [[LO_SPLIT]])
63+
; CHECK: [[REV_HI:%[^ ]+]] = call <1 x i32> @llvm.genx.bfrev.v1i32(<1 x i32> [[HI_SPLIT]])
64+
; CHECK: [[JOIN_LO:%[^ ]+]] = call <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32> undef, <1 x i32> [[REV_LO]], i32 0, i32 1, i32 2, i16 0
65+
; CHECK: [[JOIN_HI_LO:%[^ ]+]] = call <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32> [[JOIN_LO]], <1 x i32> [[REV_HI]], i32 0, i32 1, i32 2, i16 4
66+
; CHECK: [[RET:%[^ ]+]] = bitcast <2 x i32> [[JOIN_HI_LO]] to <1 x i64>
67+
; CHECK: [[RET_FINAL:%[^ ]+]] = bitcast <1 x i64> [[RET]] to i64
68+
; CHECK: ret i64 [[RET_FINAL]]
69+
%ret = call i64 @llvm.bitreverse.i64(i64 %input);
70+
ret i64 %ret
71+
}
72+
73+
; CHECK-LABEL: test_bitreverse_v8i64
74+
define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %input)
75+
{
76+
; CHECK: [[CAST:%[^ ]+]] = bitcast <8 x i64> %input to <16 x i32>
77+
; CHECK: [[LO_SPLIT:%[^ ]+]] = call <8 x i32> @llvm.genx.rdregioni.v8i32.v16i32.i16(<16 x i32> [[CAST]], i32 0, i32 8, i32 2, i16 0
78+
; CHECK: [[HI_SPLIT:%[^ ]+]] = call <8 x i32> @llvm.genx.rdregioni.v8i32.v16i32.i16(<16 x i32> [[CAST]], i32 0, i32 8, i32 2, i16 4
79+
; CHECK: [[LO_REV:%[^ ]+]] = call <8 x i32> @llvm.genx.bfrev.v8i32(<8 x i32> [[LO_SPLIT]])
80+
; CHECK: [[HI_REV:%[^ ]+]] = call <8 x i32> @llvm.genx.bfrev.v8i32(<8 x i32> [[HI_SPLIT]])
81+
; CHECK: [[LO_JOIN:%[^ ]+]] = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.i1(<16 x i32> undef, <8 x i32> [[LO_REV]], i32 0, i32 8, i32 2, i16 0
82+
; CHECK: [[JOIN:%[^ ]+]] = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.i1(<16 x i32> [[LO_JOIN]], <8 x i32> [[HI_REV]], i32 0, i32 8, i32 2, i16 4
83+
; CHECK: [[RET:%[^ ]+]] = bitcast <16 x i32> [[JOIN]] to <8 x i64>
84+
; CHECK: ret <8 x i64> [[RET]]
85+
%ret = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %input);
86+
ret <8 x i64> %ret
87+
}
88+
5289
declare i32 @llvm.usub.sat.i32(i32 %0, i32 %1)
5390
declare i32 @llvm.uadd.sat.i32(i32 %0, i32 %1)
5491

@@ -69,7 +106,6 @@ define i32 @test_uadd_sat(i32 %a, i32 %b)
69106
ret i32 %res
70107
}
71108

72-
; i64 - currently llvm.bitreverse with bitsize bigger than 32 is not supported
73109
declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1)
74110
declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
75111

@@ -83,3 +119,40 @@ define internal spir_func void @cttz_ctlz_int(<8 x i32> %arg) {
83119
ret void
84120
}
85121

122+
declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
123+
124+
; CHECK-LABEL: cttz_vec64
125+
define internal spir_func <8 x i64> @cttz_vec64(<8 x i64> %arg) {
126+
; CHECK: [[CAST32:%.*]] = bitcast <8 x i64> %arg to <16 x i32>
127+
; CHECK: [[LO_SPLIT:%.*]] = call <8 x i32> @llvm.genx.rdregioni.v8i32.v16i32.i16(<16 x i32> [[CAST32]], i32 0, i32 8, i32 2, i16 0,
128+
; CHECK: [[HI_SPLIT:%.*]] = call <8 x i32> @llvm.genx.rdregioni.v8i32.v16i32.i16(<16 x i32> [[CAST32]], i32 0, i32 8, i32 2, i16 4,
129+
; CHECK: [[REV_LO:%.*]] = call <8 x i32> @llvm.genx.bfrev.v8i32(<8 x i32> [[LO_SPLIT]])
130+
; CHECK: [[REV_HI:%.*]] = call <8 x i32> @llvm.genx.bfrev.v8i32(<8 x i32> [[HI_SPLIT]])
131+
; CHECK: [[JOIN1:%.*]] = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.i1(<16 x i32> undef, <8 x i32> [[REV_LO]], i32 0, i32 8, i32 2, i16 0
132+
; CHECK: [[JOIN2:%.*]] = call <16 x i32> @llvm.genx.wrregioni.v16i32.v8i32.i16.i1(<16 x i32> [[JOIN1]], <8 x i32> [[REV_HI]], i32 0, i32 8, i32 2, i16 4
133+
; CHECK: [[CAST:%.*]] = bitcast <16 x i32> [[JOIN2]] to <8 x i64>
134+
; CHECK: [[RET:%.*]] = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> [[CAST]], i1 false)
135+
; CHECK: ret <8 x i64> [[RET]]
136+
%ret = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %arg, i1 false)
137+
ret <8 x i64> %ret
138+
}
139+
140+
declare i64 @llvm.cttz.i64(i64, i1)
141+
142+
; CHECK-LABEL: cttz_64
143+
define internal spir_func i64 @cttz_64(i64 %arg) {
144+
; CHECK: [[CAST32:%.*]] = bitcast i64 %arg to <2 x i32>
145+
; CHECK: [[LO_SPLIT:%.*]] = call <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32> [[CAST32]], i32 0, i32 1, i32 2, i16 0
146+
; CHECK: [[HI_SPLIT:%.*]] = call <1 x i32> @llvm.genx.rdregioni.v1i32.v2i32.i16(<2 x i32> [[CAST32]], i32 0, i32 1, i32 2, i16 4
147+
; CHECK: [[REV_LO:%.*]] = call <1 x i32> @llvm.genx.bfrev.v1i32(<1 x i32> [[LO_SPLIT]])
148+
; CHECK: [[REV_HI:%.*]] = call <1 x i32> @llvm.genx.bfrev.v1i32(<1 x i32> [[HI_SPLIT]])
149+
; CHECK: [[JOIN1:%.*]] = call <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32> undef, <1 x i32> [[REV_LO]], i32 0, i32 1, i32 2, i16 0
150+
; CHECK: [[JOIN2:%.*]] = call <2 x i32> @llvm.genx.wrregioni.v2i32.v1i32.i16.i1(<2 x i32> [[JOIN1]], <1 x i32> [[REV_HI]], i32 0, i32 1, i32 2, i16 4
151+
; CHECK: [[CAST_V:%.*]] = bitcast <2 x i32> [[JOIN2]] to <1 x i64>
152+
; CHECK: [[CAST:%.*]] = bitcast <1 x i64> [[CAST_V]] to i64
153+
; CHECK: [[RET:%.*]] = call i64 @llvm.ctlz.i64(i64 [[CAST]], i1 false)
154+
; CHECK: ret i64 [[RET]]
155+
%ret = call i64 @llvm.cttz.i64(i64 %arg, i1 false)
156+
ret i64 %ret
157+
}
158+

0 commit comments

Comments
 (0)