Skip to content

Commit 79ce9bb

Browse files
committed
CodeGen: Don't drop AA metadata when splitting MachineMemOperands
Assuming this is used to split a memory access into smaller pieces, the new access should still have the same aliasing properties as the original memory access. As far as I can tell, this wasn't intentionally dropped. It may be necessary to drop this if you are moving the operand outside of the bounds of the original object in such a way that it may alias another IR object, but I don't think any of the existing users are doing this. Some of the uses widen into unused alignment padding, which I think is OK.
1 parent 18b2180 commit 79ce9bb

File tree

4 files changed

+75
-7
lines changed

4 files changed

+75
-7
lines changed

llvm/lib/CodeGen/MachineFunction.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,9 +492,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
492492
? commonAlignment(MMO->getBaseAlign(), Offset)
493493
: MMO->getBaseAlign();
494494

495+
// Do not preserve ranges, since we don't necessarily know what the high bits
496+
// are anymore.
495497
return new (Allocator)
496498
MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
497-
Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
499+
Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
498500
MMO->getOrdering(), MMO->getFailureOrdering());
499501
}
500502

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,7 +2372,6 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
23722372
if (WideMemSize == ValSize) {
23732373
MachineFunction &MF = B.getMF();
23742374

2375-
// FIXME: This is losing AA metadata
23762375
MachineMemOperand *WideMMO =
23772376
MF.getMachineMemOperand(MMO, 0, WideMemSize / 8);
23782377
Observer.changingInstr(MI);
@@ -2387,7 +2386,6 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
23872386

23882387
LLT WideTy = widenToNextPowerOf2(ValTy);
23892388

2390-
// FIXME: This is losing AA metadata
23912389
Register WideLoad;
23922390
if (!WideTy.isVector()) {
23932391
WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-memory-metadata.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ body: |
4141
liveins: $vgpr0_vgpr1
4242
; SI-LABEL: name: widen_load_range0_tbaa
4343
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
44-
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
44+
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1)
4545
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
4646
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
4747
; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
@@ -61,7 +61,7 @@ body: |
6161
liveins: $vgpr0_vgpr1
6262
; SI-LABEL: name: widen_load_range1_tbaa
6363
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
64-
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
64+
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1)
6565
; SI: $vgpr0 = COPY [[LOAD]](s32)
6666
%0:_(p1) = COPY $vgpr0_vgpr1
6767
%1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !range !0, !tbaa !1)
@@ -75,7 +75,7 @@ body: |
7575
liveins: $vgpr0_vgpr1
7676
; SI-LABEL: name: widen_load_tbaa0
7777
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
78-
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
78+
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1)
7979
; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215
8080
; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
8181
; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
@@ -95,7 +95,7 @@ body: |
9595
liveins: $vgpr0_vgpr1
9696
; SI-LABEL: name: widen_load_tbaa1
9797
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
98-
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
98+
; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, !tbaa !1, addrspace 1)
9999
; SI: $vgpr0 = COPY [[LOAD]](s32)
100100
%0:_(p1) = COPY $vgpr0_vgpr1
101101
%1:_(s32) = G_LOAD %0 :: (load 3, align 4, addrspace 1, !tbaa !1)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=regbankselect %s -o - | FileCheck -check-prefix=SI %s
3+
4+
--- |
5+
6+
define amdgpu_ps i96 @split_smrd_load_range(i96 addrspace(4)* %ptr) {
7+
%load = load i96, i96 addrspace(4)* %ptr, !range !0
8+
ret i96 %load
9+
}
10+
11+
define amdgpu_ps <3 x i32> @split_smrd_load_tbaa(<3 x i32> addrspace(4)* %ptr) {
12+
%load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, !tbaa !1
13+
ret <3 x i32> %load
14+
}
15+
16+
!0 = !{i96 0, i96 9223372036854775808}
17+
!1 = !{!"omnipotent char", !2}
18+
!2 = !{!"Simple C/C++ TBAA"}
19+
...
20+
21+
# Make sure range metadata is not preserved when widening loads, but
22+
# tbaa is.
23+
24+
---
25+
name: split_smrd_load_range
26+
legalized: true
27+
body: |
28+
bb.0:
29+
liveins: $sgpr0_sgpr1
30+
31+
; SI-LABEL: name: split_smrd_load_range
32+
; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
33+
; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, addrspace 4)
34+
; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
35+
; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
36+
; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 + 8, align 8, addrspace 4)
37+
; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF
38+
; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
39+
; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
40+
; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>)
41+
%0:_(p4) = COPY $sgpr0_sgpr1
42+
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 8, addrspace 4, !range !0)
43+
$sgpr0_sgpr1_sgpr2 = COPY %1
44+
45+
...
46+
47+
---
48+
name: split_smrd_load_tbaa
49+
legalized: true
50+
body: |
51+
bb.0:
52+
liveins: $sgpr0_sgpr1
53+
54+
; SI-LABEL: name: split_smrd_load_tbaa
55+
; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
56+
; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load 8, !tbaa !2, addrspace 4)
57+
; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8
58+
; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
59+
; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 4 + 8, align 8, !tbaa !2, addrspace 4)
60+
; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF
61+
; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
62+
; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
63+
; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>)
64+
%0:_(p4) = COPY $sgpr0_sgpr1
65+
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 8, addrspace 4, !tbaa !1)
66+
$sgpr0_sgpr1_sgpr2 = COPY %1
67+
68+
...

0 commit comments

Comments
 (0)