Skip to content

[AMDGPU][GISel] Use datalayout alignment for buffer-load legalization #95578

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,5 +585,9 @@ bool isGuaranteedNotToBePoison(Register Reg, const MachineRegisterInfo &MRI,
bool isGuaranteedNotToBeUndef(Register Reg, const MachineRegisterInfo &MRI,
unsigned Depth = 0);

/// Get the type back from LLT. It won't be 100 percent accurate but returns an
/// estimate of the type.
Type *getTypeForLLT(LLT Ty, LLVMContext &C);

} // End namespace llvm.
#endif
7 changes: 0 additions & 7 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1099,13 +1099,6 @@ void CombinerHelper::applySextInRegOfLoad(
MI.eraseFromParent();
}

static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}

/// Return true if 'MI' is a load or a store that may be fold it's address
/// operand into the load / store addressing mode.
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI,
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8392,8 +8392,6 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
return UnableToLegalize;
}

static Type *getTypeForLLT(LLT Ty, LLVMContext &C);

LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
MachineFunction &MF = *MI.getMF();
const DataLayout &DL = MIRBuilder.getDataLayout();
Expand Down Expand Up @@ -8517,13 +8515,6 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
return true;
}

static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}

// Get a vectorized representation of the memset value operand, GISel edition.
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
MachineRegisterInfo &MRI = *MIB.getMRI();
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1906,3 +1906,10 @@ bool llvm::isGuaranteedNotToBeUndef(Register Reg,
return ::isGuaranteedNotToBeUndefOrPoison(Reg, MRI, Depth,
UndefPoisonKind::UndefOnly);
}

Type *llvm::getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getElementCount());
return IntegerType::get(C, Ty.getSizeInBits());
}
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6630,9 +6630,9 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(LegalizerHelper &Helper,
MI.removeOperand(1); // Remove intrinsic ID

// FIXME: When intrinsic definition is fixed, this should have an MMO already.
// TODO: Should this use datalayout alignment?
const unsigned MemSize = (Size + 7) / 8;
const Align MemAlign(std::min(MemSize, 4u));
const Align MemAlign = B.getDataLayout().getABITypeAlign(
getTypeForLLT(Ty, MF.getFunction().getContext()));
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,13 +462,6 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getLegacyLegalizerInfo().computeTables();
}

static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
Ty.getNumElements());
return IntegerType::get(C, Ty.getSizeInBits());
}

bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ body: |
; GFX67-NEXT: {{ $}}
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
Expand All @@ -44,7 +44,7 @@ body: |
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
Expand All @@ -64,7 +64,7 @@ body: |
; GFX67-NEXT: {{ $}}
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
Expand All @@ -75,7 +75,7 @@ body: |
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
Expand All @@ -96,7 +96,7 @@ body: |
; GFX67-NEXT: {{ $}}
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
Expand All @@ -107,7 +107,7 @@ body: |
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
Expand All @@ -128,7 +128,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4)
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>)
Expand All @@ -150,7 +150,7 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4)
; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64)
; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>)
Expand All @@ -172,7 +172,7 @@ body: |
; GFX67-NEXT: {{ $}}
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX67-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GFX67-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
Expand Down Expand Up @@ -219,7 +219,7 @@ body: |
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
Expand Down Expand Up @@ -272,7 +272,7 @@ body: |
; GFX67-NEXT: {{ $}}
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might be setting the wrong alignment field, I thought the MMO didn't print the align when it was the same as the size

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed that too. I'll take a look and can post a separate PR if any changes needed.

; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
Expand All @@ -282,7 +282,7 @@ body: |
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16)
; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:_(s32) = G_CONSTANT i32 0
Expand Down
Loading
Loading