Skip to content

Commit 0d88f66

Browse files
committed
GlobalISel: ComputeNumSignBits from load range metadata
We're missing SimplifyDemandedBits styles of optimizations, so one case differs from the DAG from not trimming the constant. The other case is an optimization we get that the DAG doesn't do to split the 64-bit shift. https://reviews.llvm.org/D138082
1 parent 7032076 commit 0d88f66

File tree

3 files changed

+87
-16
lines changed

3 files changed

+87
-16
lines changed

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,11 @@ class GAnyLoad : public GLoadStore {
184184
/// Get the definition register of the loaded value.
185185
Register getDstReg() const { return getOperand(0).getReg(); }
186186

187+
/// Returns the Ranges that describes the dereference.
188+
const MDNode *getRanges() const {
189+
return getMMO().getRanges();
190+
}
191+
187192
static bool classof(const MachineInstr *MI) {
188193
switch (MI->getOpcode()) {
189194
case TargetOpcode::G_LOAD:

llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1414
#include "llvm/ADT/StringExtras.h"
1515
#include "llvm/Analysis/ValueTracking.h"
16+
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
1617
#include "llvm/CodeGen/GlobalISel/Utils.h"
1718
#include "llvm/CodeGen/MachineFrameInfo.h"
1819
#include "llvm/CodeGen/MachineRegisterInfo.h"
1920
#include "llvm/CodeGen/TargetLowering.h"
2021
#include "llvm/CodeGen/TargetOpcodes.h"
22+
#include "llvm/IR/ConstantRange.h"
2123
#include "llvm/IR/Module.h"
2224
#include "llvm/Target/TargetMachine.h"
2325

@@ -624,6 +626,33 @@ unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
624626
return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
625627
}
626628

629+
/// Compute the known number of sign bits with attached range metadata in the
630+
/// memory operand. If this is an extending load, accounts for the behavior of
631+
/// the high bits.
632+
static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
633+
unsigned TyBits) {
634+
const MDNode *Ranges = Ld->getRanges();
635+
if (!Ranges)
636+
return 1;
637+
638+
ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
639+
if (TyBits > CR.getBitWidth()) {
640+
switch (Ld->getOpcode()) {
641+
case TargetOpcode::G_SEXTLOAD:
642+
CR = CR.signExtend(TyBits);
643+
break;
644+
case TargetOpcode::G_ZEXTLOAD:
645+
CR = CR.zeroExtend(TyBits);
646+
break;
647+
default:
648+
break;
649+
}
650+
}
651+
652+
return std::min(CR.getSignedMin().getNumSignBits(),
653+
CR.getSignedMax().getNumSignBits());
654+
}
655+
627656
unsigned GISelKnownBits::computeNumSignBits(Register R,
628657
const APInt &DemandedElts,
629658
unsigned Depth) {
@@ -675,20 +704,39 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
675704
unsigned InRegBits = TyBits - SrcBits + 1;
676705
return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
677706
}
707+
case TargetOpcode::G_LOAD: {
708+
GLoad *Ld = cast<GLoad>(&MI);
709+
if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
710+
break;
711+
712+
return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
713+
}
678714
case TargetOpcode::G_SEXTLOAD: {
715+
GSExtLoad *Ld = cast<GSExtLoad>(&MI);
716+
679717
// FIXME: We need an in-memory type representation.
680718
if (DstTy.isVector())
681719
return 1;
682720

721+
unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
722+
if (NumBits != 1)
723+
return NumBits;
724+
683725
// e.g. i16->i32 = '17' bits known.
684726
const MachineMemOperand *MMO = *MI.memoperands_begin();
685727
return TyBits - MMO->getSizeInBits().getValue() + 1;
686728
}
687729
case TargetOpcode::G_ZEXTLOAD: {
730+
GZExtLoad *Ld = cast<GZExtLoad>(&MI);
731+
688732
// FIXME: We need an in-memory type representation.
689733
if (DstTy.isVector())
690734
return 1;
691735

736+
unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
737+
if (NumBits != 1)
738+
return NumBits;
739+
692740
// e.g. i16->i32 = '16' bits known.
693741
const MachineMemOperand *MMO = *MI.memoperands_begin();
694742
return TyBits - MMO->getSizeInBits().getValue();

llvm/test/CodeGen/AMDGPU/load-range-metadata-sign-bits.ll

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SDAG %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GISEL %s
34

45
define i32 @range_metadata_sext_i8_signed_range_i32(ptr addrspace(1) %ptr) {
56
; GCN-LABEL: range_metadata_sext_i8_signed_range_i32:
@@ -43,13 +44,21 @@ define i32 @range_metadata_sext_lower_range_limited_i32(ptr addrspace(1) %ptr) {
4344
}
4445

4546
define i32 @range_metadata_sext_i8_neg_neg_range_i32(ptr addrspace(1) %ptr) {
46-
; GCN-LABEL: range_metadata_sext_i8_neg_neg_range_i32:
47-
; GCN: ; %bb.0:
48-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49-
; GCN-NEXT: global_load_dword v0, v[0:1], off glc
50-
; GCN-NEXT: s_waitcnt vmcnt(0)
51-
; GCN-NEXT: v_and_b32_e32 v0, 63, v0
52-
; GCN-NEXT: s_setpc_b64 s[30:31]
47+
; SDAG-LABEL: range_metadata_sext_i8_neg_neg_range_i32:
48+
; SDAG: ; %bb.0:
49+
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50+
; SDAG-NEXT: global_load_dword v0, v[0:1], off glc
51+
; SDAG-NEXT: s_waitcnt vmcnt(0)
52+
; SDAG-NEXT: v_and_b32_e32 v0, 63, v0
53+
; SDAG-NEXT: s_setpc_b64 s[30:31]
54+
;
55+
; GISEL-LABEL: range_metadata_sext_i8_neg_neg_range_i32:
56+
; GISEL: ; %bb.0:
57+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58+
; GISEL-NEXT: global_load_dword v0, v[0:1], off glc
59+
; GISEL-NEXT: s_waitcnt vmcnt(0)
60+
; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0
61+
; GISEL-NEXT: s_setpc_b64 s[30:31]
5362
%val = load volatile i32, ptr addrspace(1) %ptr, align 4, !range !3, !noundef !{}
5463
%shl = shl i32 %val, 25
5564
%ashr = ashr i32 %shl, 25
@@ -98,14 +107,23 @@ define i32 @range_metadata_i32_neg1_to_1(ptr addrspace(1) %ptr) {
98107
}
99108

100109
define i64 @range_metadata_sext_i8_signed_range_i64(ptr addrspace(1) %ptr) {
101-
; GCN-LABEL: range_metadata_sext_i8_signed_range_i64:
102-
; GCN: ; %bb.0:
103-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104-
; GCN-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
105-
; GCN-NEXT: s_waitcnt vmcnt(0)
106-
; GCN-NEXT: v_lshlrev_b32_e32 v1, 23, v0
107-
; GCN-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1]
108-
; GCN-NEXT: s_setpc_b64 s[30:31]
110+
; SDAG-LABEL: range_metadata_sext_i8_signed_range_i64:
111+
; SDAG: ; %bb.0:
112+
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113+
; SDAG-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
114+
; SDAG-NEXT: s_waitcnt vmcnt(0)
115+
; SDAG-NEXT: v_lshlrev_b32_e32 v1, 23, v0
116+
; SDAG-NEXT: v_ashrrev_i64 v[0:1], 55, v[0:1]
117+
; SDAG-NEXT: s_setpc_b64 s[30:31]
118+
;
119+
; GISEL-LABEL: range_metadata_sext_i8_signed_range_i64:
120+
; GISEL: ; %bb.0:
121+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122+
; GISEL-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
123+
; GISEL-NEXT: s_waitcnt vmcnt(0)
124+
; GISEL-NEXT: v_bfe_i32 v0, v0, 0, 9
125+
; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0
126+
; GISEL-NEXT: s_setpc_b64 s[30:31]
109127
%val = load volatile i64, ptr addrspace(1) %ptr, align 4, !range !7, !noundef !{}
110128
%shl = shl i64 %val, 55
111129
%ashr = ashr i64 %shl, 55

0 commit comments

Comments
 (0)