Skip to content

Commit 564bd20

Browse files
authored
[AMDGPU][GlobalISel] Save a copy in one case of addrspacecast (#104789)
Refactor legalization of addrspacecast local/private -> flat to avoid building a copy in the nonnull case.
1 parent 2258bc4 commit 564bd20

File tree

2 files changed

+16
-13
lines changed

2 files changed

+16
-13
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,25 +2368,30 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
23682368
if (DestAS == AMDGPUAS::FLAT_ADDRESS &&
23692369
(SrcAS == AMDGPUAS::LOCAL_ADDRESS ||
23702370
SrcAS == AMDGPUAS::PRIVATE_ADDRESS)) {
2371-
Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
2372-
if (!ApertureReg.isValid())
2373-
return false;
2371+
auto castLocalOrPrivateToFlat = [&](const DstOp &Dst) -> Register {
2372+
Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
2373+
if (!ApertureReg.isValid())
2374+
return false;
23742375

2375-
// Coerce the type of the low half of the result so we can use merge_values.
2376-
Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
2376+
// Coerce the type of the low half of the result so we can use
2377+
// merge_values.
2378+
Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0);
23772379

2378-
// TODO: Should we allow mismatched types but matching sizes in merges to
2379-
// avoid the ptrtoint?
2380-
auto BuildPtr = B.buildMergeLikeInstr(DstTy, {SrcAsInt, ApertureReg});
2380+
// TODO: Should we allow mismatched types but matching sizes in merges to
2381+
// avoid the ptrtoint?
2382+
return B.buildMergeLikeInstr(Dst, {SrcAsInt, ApertureReg}).getReg(0);
2383+
};
23812384

23822385
// For llvm.amdgcn.addrspacecast.nonnull we can always assume non-null, for
23832386
// G_ADDRSPACE_CAST we need to guess.
23842387
if (isa<GIntrinsic>(MI) || isKnownNonNull(Src, MRI, TM, SrcAS)) {
2385-
B.buildCopy(Dst, BuildPtr);
2388+
castLocalOrPrivateToFlat(Dst);
23862389
MI.eraseFromParent();
23872390
return true;
23882391
}
23892392

2393+
Register BuildPtr = castLocalOrPrivateToFlat(DstTy);
2394+
23902395
auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
23912396
auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
23922397

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -509,16 +509,14 @@ body: |
509509
; SIVI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
510510
; SIVI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
511511
; SIVI-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
512-
; SIVI-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
513-
; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](p0)
512+
; SIVI-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
514513
; GFX9-LABEL: name: test_addrspacecast_p5_fi_to_p0
515514
; GFX9: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
516515
; GFX9-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64(s64) = S_MOV_B64 $src_private_base
517516
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[S_MOV_B64_]](s64)
518517
; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[FRAME_INDEX]](p5)
519518
; GFX9-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[UV1]](s32)
520-
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY [[MV]](p0)
521-
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](p0)
519+
; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
522520
%0:_(p5) = G_FRAME_INDEX %stack.0
523521
%1:_(p0) = G_ADDRSPACE_CAST %0
524522
$vgpr0_vgpr1 = COPY %1

0 commit comments

Comments
 (0)