Skip to content

Commit fd11bb6

Browse files
author
git apple-llvm automerger
committed
Merge commit '41507fe595d0' from llvm.org/main into next
2 parents ebd157d + 41507fe commit fd11bb6

File tree

10 files changed

+2425
-2740
lines changed

10 files changed

+2425
-2740
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,10 @@ class CombinerHelper {
196196
/// Match (and (load x), mask) -> zextload x
197197
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo);
198198

199+
/// Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed
200+
/// load.
201+
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo);
202+
199203
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
200204
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
201205

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,12 @@ def sext_inreg_to_zext_inreg : GICombineRule<
259259
}])
260260
>;
261261

262+
def combine_extracted_vector_load : GICombineRule<
263+
(defs root:$root, build_fn_matchinfo:$matchinfo),
264+
(match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
265+
[{ return Helper.matchCombineExtractedVectorLoad(*${root}, ${matchinfo}); }]),
266+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
267+
262268
def combine_indexed_load_store : GICombineRule<
263269
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
264270
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
@@ -1291,8 +1297,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
12911297
constant_fold_fp_binop]>;
12921298

12931299
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
1294-
extract_vec_elt_combines, combines_for_extload,
1295-
undef_combines, identity_combines, phi_combines,
1300+
extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load,
1301+
undef_combines, identity_combines, phi_combines,
12961302
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
12971303
reassocs, ptr_add_immed_chain,
12981304
shl_ashr_to_sext_inreg, sext_inreg_of_load,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,101 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
11651165
return RealUse;
11661166
}
11671167

1168+
bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI,
1169+
BuildFnTy &MatchInfo) {
1170+
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1171+
1172+
// Check if there is a load that defines the vector being extracted from.
1173+
auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1174+
if (!LoadMI)
1175+
return false;
1176+
1177+
Register Vector = MI.getOperand(1).getReg();
1178+
LLT VecEltTy = MRI.getType(Vector).getElementType();
1179+
LLT ResultTy = MRI.getType(MI.getOperand(0).getReg());
1180+
1181+
assert(ResultTy == VecEltTy);
1182+
1183+
// Checking whether we should reduce the load width.
1184+
if (!MRI.hasOneNonDBGUse(Vector))
1185+
return false;
1186+
1187+
// Check if the defining load is simple.
1188+
if (!LoadMI->isSimple())
1189+
return false;
1190+
1191+
// If the vector element type is not a multiple of a byte then we are unable
1192+
// to correctly compute an address to load only the extracted element as a
1193+
// scalar.
1194+
if (!VecEltTy.isByteSized())
1195+
return false;
1196+
1197+
// Check if the new load that we are going to create is legal
1198+
// if we are in the post-legalization phase.
1199+
MachineMemOperand MMO = LoadMI->getMMO();
1200+
Align Alignment = MMO.getAlign();
1201+
MachinePointerInfo PtrInfo;
1202+
uint64_t Offset;
1203+
1204+
// Finding the appropriate PtrInfo if offset is a known constant.
1205+
// This is required to create the memory operand for the narrowed load.
1206+
// This machine memory operand object helps us infer about legality
1207+
// before we proceed to combine the instruction.
1208+
if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1209+
int Elt = CVal->getZExtValue();
1210+
// FIXME: should be (ABI size)*Elt.
1211+
Offset = VecEltTy.getSizeInBits() * Elt / 8;
1212+
PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1213+
} else {
1214+
// Discard the pointer info except the address space because the memory
1215+
// operand can't represent this new access since the offset is variable.
1216+
Offset = VecEltTy.getSizeInBits() / 8;
1217+
PtrInfo = MachinePointerInfo(MMO.getPointerInfo().getAddrSpace());
1218+
}
1219+
1220+
Alignment = commonAlignment(Alignment, Offset);
1221+
1222+
Register VecPtr = LoadMI->getPointerReg();
1223+
LLT PtrTy = MRI.getType(VecPtr);
1224+
1225+
MachineFunction &MF = *MI.getMF();
1226+
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1227+
1228+
LegalityQuery::MemDesc MMDesc(*NewMMO);
1229+
1230+
LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1231+
1232+
if (!isLegalOrBeforeLegalizer(Q))
1233+
return false;
1234+
1235+
// Load must be allowed and fast on the target.
1236+
LLVMContext &C = MF.getFunction().getContext();
1237+
auto &DL = MF.getDataLayout();
1238+
unsigned Fast = 0;
1239+
if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1240+
&Fast) ||
1241+
!Fast)
1242+
return false;
1243+
1244+
Register Result = MI.getOperand(0).getReg();
1245+
Register Index = MI.getOperand(2).getReg();
1246+
1247+
MatchInfo = [=](MachineIRBuilder &B) {
1248+
GISelObserverWrapper DummyObserver;
1249+
LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1250+
//// Get pointer to the vector element.
1251+
Register finalPtr = Helper.getVectorElementPointer(
1252+
LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1253+
Index);
1254+
// New G_LOAD instruction.
1255+
B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1256+
// Remove original GLOAD instruction.
1257+
LoadMI->eraseFromParent();
1258+
};
1259+
1260+
return true;
1261+
}
1262+
11681263
bool CombinerHelper::matchCombineIndexedLoadStore(
11691264
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
11701265
auto &LdSt = cast<GLoadStore>(MI);

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -14659,17 +14659,9 @@ define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) {
1465914659
;
1466014660
; CHECK-GISEL-LABEL: load_single_extract_variable_index_i8:
1466114661
; CHECK-GISEL: ; %bb.0:
14662-
; CHECK-GISEL-NEXT: sub sp, sp, #16
14663-
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
14664-
; CHECK-GISEL-NEXT: mov w9, w1
14665-
; CHECK-GISEL-NEXT: ldr q0, [x0]
14666-
; CHECK-GISEL-NEXT: mov x8, sp
14667-
; CHECK-GISEL-NEXT: and x9, x9, #0xf
14668-
; CHECK-GISEL-NEXT: lsl x10, x9, #1
14669-
; CHECK-GISEL-NEXT: str q0, [sp]
14670-
; CHECK-GISEL-NEXT: sub x9, x10, x9
14671-
; CHECK-GISEL-NEXT: ldrb w0, [x8, x9]
14672-
; CHECK-GISEL-NEXT: add sp, sp, #16
14662+
; CHECK-GISEL-NEXT: mov w8, w1
14663+
; CHECK-GISEL-NEXT: and x8, x8, #0xf
14664+
; CHECK-GISEL-NEXT: ldrb w0, [x0, x8]
1467314665
; CHECK-GISEL-NEXT: ret
1467414666
%lv = load <16 x i8>, ptr %A
1467514667
%e = extractelement <16 x i8> %lv, i32 %idx
@@ -14692,15 +14684,9 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
1469214684
;
1469314685
; CHECK-GISEL-LABEL: load_single_extract_variable_index_i16:
1469414686
; CHECK-GISEL: ; %bb.0:
14695-
; CHECK-GISEL-NEXT: sub sp, sp, #16
14696-
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
14697-
; CHECK-GISEL-NEXT: ldr q0, [x0]
14698-
; CHECK-GISEL-NEXT: mov w9, w1
14699-
; CHECK-GISEL-NEXT: mov x8, sp
14700-
; CHECK-GISEL-NEXT: and x9, x9, #0x7
14701-
; CHECK-GISEL-NEXT: str q0, [sp]
14702-
; CHECK-GISEL-NEXT: ldrh w0, [x8, x9, lsl #1]
14703-
; CHECK-GISEL-NEXT: add sp, sp, #16
14687+
; CHECK-GISEL-NEXT: mov w8, w1
14688+
; CHECK-GISEL-NEXT: and x8, x8, #0x7
14689+
; CHECK-GISEL-NEXT: ldrh w0, [x0, x8, lsl #1]
1470414690
; CHECK-GISEL-NEXT: ret
1470514691
%lv = load <8 x i16>, ptr %A
1470614692
%e = extractelement <8 x i16> %lv, i32 %idx
@@ -14717,15 +14703,9 @@ define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) {
1471714703
;
1471814704
; CHECK-GISEL-LABEL: load_single_extract_variable_index_i32:
1471914705
; CHECK-GISEL: ; %bb.0:
14720-
; CHECK-GISEL-NEXT: sub sp, sp, #16
14721-
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
14722-
; CHECK-GISEL-NEXT: ldr q0, [x0]
14723-
; CHECK-GISEL-NEXT: mov w9, w1
14724-
; CHECK-GISEL-NEXT: mov x8, sp
14725-
; CHECK-GISEL-NEXT: and x9, x9, #0x3
14726-
; CHECK-GISEL-NEXT: str q0, [sp]
14727-
; CHECK-GISEL-NEXT: ldr w0, [x8, x9, lsl #2]
14728-
; CHECK-GISEL-NEXT: add sp, sp, #16
14706+
; CHECK-GISEL-NEXT: mov w8, w1
14707+
; CHECK-GISEL-NEXT: and x8, x8, #0x3
14708+
; CHECK-GISEL-NEXT: ldr w0, [x0, x8, lsl #2]
1472914709
; CHECK-GISEL-NEXT: ret
1473014710
%lv = load <4 x i32>, ptr %A
1473114711
%e = extractelement <4 x i32> %lv, i32 %idx
@@ -14779,14 +14759,8 @@ define i32 @load_single_extract_variable_index_masked_i32(ptr %A, i32 %idx) {
1477914759
;
1478014760
; CHECK-GISEL-LABEL: load_single_extract_variable_index_masked_i32:
1478114761
; CHECK-GISEL: ; %bb.0:
14782-
; CHECK-GISEL-NEXT: sub sp, sp, #16
14783-
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
14784-
; CHECK-GISEL-NEXT: ldr q0, [x0]
14785-
; CHECK-GISEL-NEXT: mov x8, sp
14786-
; CHECK-GISEL-NEXT: and w9, w1, #0x3
14787-
; CHECK-GISEL-NEXT: str q0, [sp]
14788-
; CHECK-GISEL-NEXT: ldr w0, [x8, w9, uxtw #2]
14789-
; CHECK-GISEL-NEXT: add sp, sp, #16
14762+
; CHECK-GISEL-NEXT: and w8, w1, #0x3
14763+
; CHECK-GISEL-NEXT: ldr w0, [x0, w8, uxtw #2]
1479014764
; CHECK-GISEL-NEXT: ret
1479114765
%idx.x = and i32 %idx, 3
1479214766
%lv = load <4 x i32>, ptr %A
@@ -14803,14 +14777,8 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
1480314777
;
1480414778
; CHECK-GISEL-LABEL: load_single_extract_variable_index_masked2_i32:
1480514779
; CHECK-GISEL: ; %bb.0:
14806-
; CHECK-GISEL-NEXT: sub sp, sp, #16
14807-
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
14808-
; CHECK-GISEL-NEXT: ldr q0, [x0]
14809-
; CHECK-GISEL-NEXT: mov x8, sp
14810-
; CHECK-GISEL-NEXT: and w9, w1, #0x1
14811-
; CHECK-GISEL-NEXT: str q0, [sp]
14812-
; CHECK-GISEL-NEXT: ldr w0, [x8, w9, uxtw #2]
14813-
; CHECK-GISEL-NEXT: add sp, sp, #16
14780+
; CHECK-GISEL-NEXT: and w8, w1, #0x1
14781+
; CHECK-GISEL-NEXT: ldr w0, [x0, w8, uxtw #2]
1481414782
; CHECK-GISEL-NEXT: ret
1481514783
%idx.x = and i32 %idx, 1
1481614784
%lv = load <4 x i32>, ptr %A

0 commit comments

Comments
 (0)