Skip to content

GlobalISel: Drop vector range metadata on bitcast lowering #97279

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/MachineMemOperand.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ class MachineMemOperand {
MemoryType = NewTy;
}

/// Unset the tracked range metadata.
void clearRanges() { Ranges = nullptr; }

/// Support for operator<<.
/// @{
void print(raw_ostream &OS, ModuleSlotTracker &MST,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3639,6 +3639,9 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Observer.changingInstr(MI);
bitcastDst(MI, CastTy, 0);
MMO.setType(CastTy);
// The range metadata is no longer valid when reinterpreted as a different
// type.
MMO.clearRanges();
Observer.changedInstr(MI);
return Legalized;
}
Expand Down
150 changes: 150 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -stop-after=legalizer -o - %s | FileCheck %s

; Test behavior of legalizer when vector loads have range metadata,
; and are lowered by bitcasting to a scalar integer, so we have to
; drop the range metadata.

define <4 x i8> @global_load_v4i8_align4__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_load_v4i8_align4__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.ptr, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C2]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[LSHR2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%load = load <4 x i8>, ptr addrspace(1) %ptr, align 4, !range !0, !noundef !1
ret <4 x i8> %load
}

; This is also widened.
define <3 x i8> @global_load_v3i8_align4__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_load_v3i8_align4__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s32) from %ir.ptr, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C1]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[LSHR1]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%load = load <3 x i8>, ptr addrspace(1) %ptr, align 4, !range !0, !noundef !1
ret <3 x i8> %load
}

define <2 x i8> @global_load_v2i8_align2__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_load_v2i8_align2__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p1) :: (load (s16) from %ir.ptr, addrspace 1)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[LSHR]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%load = load <2 x i8>, ptr addrspace(1) %ptr, align 2, !range !0, !noundef !1
ret <2 x i8> %load
}

define <2 x i64> @global_load_v2i64_align16__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_load_v2i64_align16__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !2, addrspace 1)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%load = load <2 x i64>, ptr addrspace(1) %ptr, align 16, !range !2, !noundef !1
ret <2 x i64> %load
}

; This goes the other direction and converts a scalar load to a vector.
define i128 @global_load_i128_align16__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_load_i128_align16__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p1) :: (load (<4 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s128)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%load = load i128, ptr addrspace(1) %ptr, align 16, !range !3, !noundef !1
ret i128 %load
}

; Load will be zero extended, so we should be able to extend the range
; metadata.
define i32 @global_sextload_i8_align1__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_sextload_i8_align1__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !0, addrspace 1)
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%load = load i8, ptr addrspace(1) %ptr, align 1, !range !0, !noundef !1
%ext = sext i8 %load to i32
ret i32 %ext
}

define i32 @global_zextload_i8_align1__rangemd(ptr addrspace(1) %ptr) {
; CHECK-LABEL: name: global_zextload_i8_align1__rangemd
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !4, addrspace 1)
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%load = load i8, ptr addrspace(1) %ptr, align 1, !range !4, !noundef !1
%ext = sext i8 %load to i32
ret i32 %ext
}

!0 = !{i8 -32, i8 64}
!1 = !{}
!2 = !{i64 -2048, i64 1024}
!3 = !{i128 -2048, i128 1024}
!4 = !{i8 8, i8 64}
Loading