Skip to content

Commit 11cdda0

Browse files
OutOfCachezahiraam
authored andcommitted
[AMDGPU] - Add constant folding for s_bitreplicate (llvm#72366)
If the input is a constant, we can constant fold the s_bitreplicate operation.
1 parent f082412 commit 11cdda0

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
15331533
case Intrinsic::amdgcn_perm:
15341534
case Intrinsic::amdgcn_wave_reduce_umin:
15351535
case Intrinsic::amdgcn_wave_reduce_umax:
1536+
case Intrinsic::amdgcn_s_bitreplicate:
15361537
case Intrinsic::arm_mve_vctp8:
15371538
case Intrinsic::arm_mve_vctp16:
15381539
case Intrinsic::arm_mve_vctp32:
@@ -2422,6 +2423,18 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
24222423

24232424
return ConstantFP::get(Ty->getContext(), Val);
24242425
}
2426+
2427+
case Intrinsic::amdgcn_s_bitreplicate: {
2428+
uint64_t Val = Op->getZExtValue();
2429+
Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;
2430+
Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;
2431+
Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;
2432+
Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;
2433+
Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;
2434+
Val = Val | Val << 1;
2435+
return ConstantInt::get(Ty, Val);
2436+
}
2437+
24252438
default:
24262439
return nullptr;
24272440
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ define i64 @test_s_bitreplicate_constant() {
88
; GFX11-LABEL: test_s_bitreplicate_constant:
99
; GFX11: ; %bb.0: ; %entry
1010
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11-
; GFX11-NEXT: s_bitreplicate_b64_b32 s[0:1], 0x85fe3a92
12-
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
11+
; GFX11-NEXT: v_mov_b32_e32 v0, 0xfccc30c
12+
; GFX11-NEXT: v_mov_b32_e32 v1, 0xc033fffc
1313
; GFX11-NEXT: s_setpc_b64 s[30:31]
1414
entry:
1515
%br = call i64 @llvm.amdgcn.s.bitreplicate(i32 u0x85FE3A92)

0 commit comments

Comments
 (0)