Skip to content

Commit 8fa9b4e

Browse files
LU-JOHNsys-ce-bb
authored andcommitted
Add support for lowering bitreverse for 2/4-bit types (#2481)
Add support for lowering of llvm.bitreverse.i2/4 Signed-off-by: Lu, John <[email protected]> Original commit: KhronosGroup/SPIRV-LLVM-Translator@3dd108b9001af50
1 parent b06a51d commit 8fa9b4e

File tree

4 files changed

+327
-67
lines changed

4 files changed

+327
-67
lines changed

llvm-spirv/lib/SPIRV/LLVMBitreverse.h

Lines changed: 89 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
// from the C code in LLVMIntrinsicEmulation/bitreverse.c with a custom clang
3838
// that was modified to disable llvm.bitreverse.* intrinsic generation.
3939
//
40+
// A similar command was run on LLVMIntrinsicEmulation/small_bitreverse.c to
41+
// produce functions to reverse 2-bit and 4-bit types.
42+
//
4043
// Manual modification was done to avoid coercing vector types into scalar
4144
// types. For example, the original LLVM IR:
4245
//
@@ -59,8 +62,70 @@
5962
// ...
6063
// ret <4 x i8> %or12
6164
// }
65+
#define GEN_CONST1(BASE_TYPE, VAL) #VAL
66+
#define GEN_CONST2(BASE_TYPE, VAL) \
67+
"<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL ">"
68+
#define GEN_CONST3(BASE_TYPE, VAL) \
69+
"<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL ">"
70+
#define GEN_CONST4(BASE_TYPE, VAL) \
71+
"<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
72+
"," #BASE_TYPE " " #VAL ">"
73+
#define GEN_CONST8(BASE_TYPE, VAL) \
74+
"<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
75+
"," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
76+
"," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL ">"
77+
#define GEN_CONST16(BASE_TYPE, VAL) \
78+
"<" #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
79+
"," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
80+
"," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
81+
"," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
82+
"," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL "," #BASE_TYPE " " #VAL \
83+
"," #BASE_TYPE " " #VAL ">"
84+
85+
// clang-format off
86+
#define MAKE_BITREVERSE_2BIT(SUFFIX,TYPE_STR,NUM_ELTS,BASE_TYPE) \
87+
static const char LLVMBitreverse ## SUFFIX[]{" \n\
88+
define " TYPE_STR " @llvm_bitreverse_" #SUFFIX "(" TYPE_STR " %A) { \n\
89+
entry: \n\
90+
%and = shl " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE,1) " \n\
91+
%shr4 = lshr " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE,1) " \n\
92+
%or = or disjoint " TYPE_STR " %and, %shr4 \n\
93+
ret " TYPE_STR " %or \n\
94+
} \n\
95+
"}
96+
97+
MAKE_BITREVERSE_2BIT(i2, "i2", 1, i2);
98+
MAKE_BITREVERSE_2BIT(v2i2, "<2 x i2>", 2, i2);
99+
MAKE_BITREVERSE_2BIT(v3i2, "<3 x i2>", 3, i2);
100+
MAKE_BITREVERSE_2BIT(v4i2, "<4 x i2>", 4, i2);
101+
MAKE_BITREVERSE_2BIT(v8i2, "<8 x i2>", 8, i2);
102+
MAKE_BITREVERSE_2BIT(v16i2, "<16 x i2>", 16, i2);
103+
104+
#define MAKE_BITREVERSE_4BIT(SUFFIX,TYPE_STR,NUM_ELTS,BASE_TYPE) \
105+
static const char LLVMBitreverse ## SUFFIX[]{" \n\
106+
define " TYPE_STR " @llvm_bitreverse_" #SUFFIX "(" TYPE_STR " %A) { \n\
107+
entry: \n\
108+
%and = shl " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 2) " \n\
109+
%shr = lshr " TYPE_STR " %A, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 2) " \n\
110+
%or = or disjoint " TYPE_STR " %and, %shr \n\
111+
%and2 = shl " TYPE_STR " %or, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 1) " \n\
112+
%shl3 = and " TYPE_STR " %and2, " GEN_CONST ## NUM_ELTS(BASE_TYPE,-6) " \n\
113+
%shr4 = lshr " TYPE_STR " %or, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 1) " \n\
114+
%and5 = and " TYPE_STR " %shr4, " GEN_CONST ## NUM_ELTS(BASE_TYPE, 5) " \n\
115+
%or6 = or disjoint " TYPE_STR " %shl3, %and5 \n\
116+
ret " TYPE_STR " %or6 \n\
117+
} \n\
118+
"}
119+
120+
MAKE_BITREVERSE_4BIT(i4, "i4", 1, i4);
121+
MAKE_BITREVERSE_4BIT(v2i4, "<2 x i4>", 2, i4);
122+
MAKE_BITREVERSE_4BIT(v3i4, "<3 x i4>", 3, i4);
123+
MAKE_BITREVERSE_4BIT(v4i4, "<4 x i4>", 4, i4);
124+
MAKE_BITREVERSE_4BIT(v8i4, "<8 x i4>", 8, i4);
125+
MAKE_BITREVERSE_4BIT(v16i4, "<16 x i4>", 16, i4);
126+
// clang-format on
62127

63-
static const char LLVMBitreverseScalari8[]{R"(
128+
static const char LLVMBitreversei8[]{R"(
64129
define zeroext i8 @llvm_bitreverse_i8(i8 %A) {
65130
entry:
66131
%and = shl i8 %A, 4
@@ -80,7 +145,7 @@ define zeroext i8 @llvm_bitreverse_i8(i8 %A) {
80145
}
81146
)"};
82147

83-
static const char LLVMBitreverseScalari16[]{R"(
148+
static const char LLVMBitreversei16[]{R"(
84149
define zeroext i16 @llvm_bitreverse_i16(i16 %A) {
85150
entry:
86151
%and = shl i16 %A, 8
@@ -105,7 +170,7 @@ define zeroext i16 @llvm_bitreverse_i16(i16 %A) {
105170
}
106171
)"};
107172

108-
static const char LLVMBitreverseScalari32[]{R"(
173+
static const char LLVMBitreversei32[]{R"(
109174
define i32 @llvm_bitreverse_i32(i32 %A) {
110175
entry:
111176
%and = shl i32 %A, 16
@@ -135,7 +200,7 @@ define i32 @llvm_bitreverse_i32(i32 %A) {
135200
}
136201
)"};
137202

138-
static const char LLVMBitreverseScalari64[]{R"(
203+
static const char LLVMBitreversei64[]{R"(
139204
define i64 @llvm_bitreverse_i64(i64 %A) {
140205
entry:
141206
%and = shl i64 %A, 32
@@ -170,7 +235,7 @@ define i64 @llvm_bitreverse_i64(i64 %A) {
170235
}
171236
)"};
172237

173-
static const char LLVMBitreverseV2i8[]{R"(
238+
static const char LLVMBitreversev2i8[]{R"(
174239
define <2 x i8> @llvm_bitreverse_v2i8(<2 x i8> %A) {
175240
entry:
176241
%shl = shl <2 x i8> %A, <i8 4, i8 4>
@@ -190,7 +255,7 @@ define <2 x i8> @llvm_bitreverse_v2i8(<2 x i8> %A) {
190255
}
191256
)"};
192257

193-
static const char LLVMBitreverseV2i16[]{R"(
258+
static const char LLVMBitreversev2i16[]{R"(
194259
define <2 x i16> @llvm_bitreverse_v2i16(<2 x i16> %A) {
195260
entry:
196261
%shl = shl <2 x i16> %A, <i16 8, i16 8>
@@ -215,7 +280,7 @@ define <2 x i16> @llvm_bitreverse_v2i16(<2 x i16> %A) {
215280
}
216281
)"};
217282

218-
static const char LLVMBitreverseV2i32[]{R"(
283+
static const char LLVMBitreversev2i32[]{R"(
219284
define <2 x i32> @llvm_bitreverse_v2i32(<2 x i32> %A) {
220285
entry:
221286
%shl = shl <2 x i32> %A, <i32 16, i32 16>
@@ -245,7 +310,7 @@ define <2 x i32> @llvm_bitreverse_v2i32(<2 x i32> %A) {
245310
}
246311
)"};
247312

248-
static const char LLVMBitreverseV2i64[]{R"(
313+
static const char LLVMBitreversev2i64[]{R"(
249314
define <2 x i64> @llvm_bitreverse_v2i64(<2 x i64> %A) {
250315
entry:
251316
%shl = shl <2 x i64> %A, <i64 32, i64 32>
@@ -280,7 +345,7 @@ define <2 x i64> @llvm_bitreverse_v2i64(<2 x i64> %A) {
280345
}
281346
)"};
282347

283-
static const char LLVMBitreverseV3i8[]{R"(
348+
static const char LLVMBitreversev3i8[]{R"(
284349
define <3 x i8> @llvm_bitreverse_v3i8(<3 x i8> %A) {
285350
entry:
286351
%shl = shl <3 x i8> %A, <i8 4, i8 4, i8 4>
@@ -300,7 +365,7 @@ define <3 x i8> @llvm_bitreverse_v3i8(<3 x i8> %A) {
300365
}
301366
)"};
302367

303-
static const char LLVMBitreverseV3i16[]{R"(
368+
static const char LLVMBitreversev3i16[]{R"(
304369
define <3 x i16> @llvm_bitreverse_v3i16(<3 x i16> %A) {
305370
entry:
306371
%shl = shl <3 x i16> %A, <i16 8, i16 8, i16 8>
@@ -325,7 +390,7 @@ define <3 x i16> @llvm_bitreverse_v3i16(<3 x i16> %A) {
325390
}
326391
)"};
327392

328-
static const char LLVMBitreverseV3i32[]{R"(
393+
static const char LLVMBitreversev3i32[]{R"(
329394
define <3 x i32> @llvm_bitreverse_v3i32(<3 x i32> %A) {
330395
entry:
331396
%shl = shl <3 x i32> %A, <i32 16, i32 16, i32 16>
@@ -355,7 +420,7 @@ define <3 x i32> @llvm_bitreverse_v3i32(<3 x i32> %A) {
355420
}
356421
)"};
357422

358-
static const char LLVMBitreverseV3i64[]{R"(
423+
static const char LLVMBitreversev3i64[]{R"(
359424
define <3 x i64> @llvm_bitreverse_v3i64(<3 x i64> %A) {
360425
entry:
361426
%shl = shl <3 x i64> %A, <i64 32, i64 32, i64 32>
@@ -390,7 +455,7 @@ define <3 x i64> @llvm_bitreverse_v3i64(<3 x i64> %A) {
390455
}
391456
)"};
392457

393-
static const char LLVMBitreverseV4i8[]{R"(
458+
static const char LLVMBitreversev4i8[]{R"(
394459
define <4 x i8> @llvm_bitreverse_v4i8(<4 x i8> %A) {
395460
entry:
396461
%shl = shl <4 x i8> %A, <i8 4, i8 4, i8 4, i8 4>
@@ -410,7 +475,7 @@ define <4 x i8> @llvm_bitreverse_v4i8(<4 x i8> %A) {
410475
}
411476
)"};
412477

413-
static const char LLVMBitreverseV4i16[]{R"(
478+
static const char LLVMBitreversev4i16[]{R"(
414479
define <4 x i16> @llvm_bitreverse_v4i16(<4 x i16> %A) {
415480
entry:
416481
%shl = shl <4 x i16> %A, <i16 8, i16 8, i16 8, i16 8>
@@ -435,7 +500,7 @@ define <4 x i16> @llvm_bitreverse_v4i16(<4 x i16> %A) {
435500
}
436501
)"};
437502

438-
static const char LLVMBitreverseV4i32[]{R"(
503+
static const char LLVMBitreversev4i32[]{R"(
439504
define <4 x i32> @llvm_bitreverse_v4i32(<4 x i32> %A) {
440505
entry:
441506
%shl = shl <4 x i32> %A, <i32 16, i32 16, i32 16, i32 16>
@@ -465,7 +530,7 @@ define <4 x i32> @llvm_bitreverse_v4i32(<4 x i32> %A) {
465530
}
466531
)"};
467532

468-
static const char LLVMBitreverseV4i64[]{R"(
533+
static const char LLVMBitreversev4i64[]{R"(
469534
define <4 x i64> @llvm_bitreverse_v4i64(<4 x i64> %A) {
470535
entry:
471536
%shl = shl <4 x i64> %A, <i64 32, i64 32, i64 32, i64 32>
@@ -500,7 +565,7 @@ define <4 x i64> @llvm_bitreverse_v4i64(<4 x i64> %A) {
500565
}
501566
)"};
502567

503-
static const char LLVMBitreverseV8i8[]{R"(
568+
static const char LLVMBitreversev8i8[]{R"(
504569
define <8 x i8> @llvm_bitreverse_v8i8(<8 x i8> %A) {
505570
entry:
506571
%shl = shl <8 x i8> %A, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -520,7 +585,7 @@ define <8 x i8> @llvm_bitreverse_v8i8(<8 x i8> %A) {
520585
}
521586
)"};
522587

523-
static const char LLVMBitreverseV8i16[]{R"(
588+
static const char LLVMBitreversev8i16[]{R"(
524589
define <8 x i16> @llvm_bitreverse_v8i16(<8 x i16> %A) {
525590
entry:
526591
%shl = shl <8 x i16> %A, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -545,7 +610,7 @@ define <8 x i16> @llvm_bitreverse_v8i16(<8 x i16> %A) {
545610
}
546611
)"};
547612

548-
static const char LLVMBitreverseV8i32[]{R"(
613+
static const char LLVMBitreversev8i32[]{R"(
549614
define <8 x i32> @llvm_bitreverse_v8i32(<8 x i32> %A) {
550615
entry:
551616
%shl = shl <8 x i32> %A, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -575,7 +640,7 @@ define <8 x i32> @llvm_bitreverse_v8i32(<8 x i32> %A) {
575640
}
576641
)"};
577642

578-
static const char LLVMBitreverseV8i64[]{R"(
643+
static const char LLVMBitreversev8i64[]{R"(
579644
define <8 x i64> @llvm_bitreverse_v8i64(<8 x i64> %A) {
580645
entry:
581646
%shl = shl <8 x i64> %A, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
@@ -610,7 +675,7 @@ define <8 x i64> @llvm_bitreverse_v8i64(<8 x i64> %A) {
610675
}
611676
)"};
612677

613-
static const char LLVMBitreverseV16i8[]{R"(
678+
static const char LLVMBitreversev16i8[]{R"(
614679
define <16 x i8> @llvm_bitreverse_v16i8(<16 x i8> %A) {
615680
entry:
616681
%shl = shl <16 x i8> %A, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -630,7 +695,7 @@ define <16 x i8> @llvm_bitreverse_v16i8(<16 x i8> %A) {
630695
}
631696
)"};
632697

633-
static const char LLVMBitreverseV16i16[]{R"(
698+
static const char LLVMBitreversev16i16[]{R"(
634699
define <16 x i16> @llvm_bitreverse_v16i16(<16 x i16> %A) {
635700
entry:
636701
%shl = shl <16 x i16> %A, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -655,7 +720,7 @@ define <16 x i16> @llvm_bitreverse_v16i16(<16 x i16> %A) {
655720
}
656721
)"};
657722

658-
static const char LLVMBitreverseV16i32[]{R"(
723+
static const char LLVMBitreversev16i32[]{R"(
659724
define <16 x i32> @llvm_bitreverse_v16i32(<16 x i32> %A) {
660725
entry:
661726
%shl = shl <16 x i32> %A, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
@@ -685,7 +750,7 @@ define <16 x i32> @llvm_bitreverse_v16i32(<16 x i32> %A) {
685750
}
686751
)"};
687752

688-
static const char LLVMBitreverseV16i64[]{R"(
753+
static const char LLVMBitreversev16i64[]{R"(
689754
define <16 x i64> @llvm_bitreverse_v16i64(<16 x i64> %A) {
690755
entry:
691756
%shl = shl <16 x i64> %A, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// clang-format off
2+
/*
3+
#include <stdlib.h>
4+
5+
#define MASK2LO 0x3333333333333333LLU
6+
#define MASK1LO 0x5555555555555555LLU
7+
8+
#define SWAP2(X,TYPE) (((X&((TYPE) MASK2LO ))<< 2) | (((X)>> 2)&((TYPE) MASK2LO)))
9+
#define SWAP1(X,TYPE) (((X&((TYPE) MASK1LO ))<< 1) | (((X)>> 1)&((TYPE) MASK1LO)))
10+
11+
#define uint2_t _BitInt(2)
12+
#define uint4_t _BitInt(4)
13+
14+
///////////////////////////////////////////////////////////////////////////////////////
15+
// scalar
16+
///////////////////////////////////////////////////////////////////////////////////////
17+
18+
uint2_t llvm_bitreverse_i2(uint2_t A) {
19+
A=SWAP1(A,uint2_t);
20+
return A;
21+
}
22+
23+
uint4_t llvm_bitreverse_i4(uint4_t A) {
24+
A=SWAP2(A,uint4_t);
25+
A=SWAP1(A,uint4_t);
26+
return A;
27+
}
28+
*/
29+
// clang-format on

0 commit comments

Comments
 (0)