Skip to content

Commit 25e9938

Browse files
committed
GlobalISel: Handle more cases of G_SEXT narrowing
This now develops the same problem G_ZEXT/G_ANYEXT have where the requested type is assumed to be the source type. This will be fixed separately by creating intermediate merges.
1 parent daab922 commit 25e9938

File tree

2 files changed

+62
-44
lines changed

2 files changed

+62
-44
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -657,49 +657,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
657657
MI.eraseFromParent();
658658
return Legalized;
659659
}
660-
case TargetOpcode::G_SEXT: {
661-
if (TypeIdx != 0)
662-
return UnableToLegalize;
663-
664-
Register SrcReg = MI.getOperand(1).getReg();
665-
LLT SrcTy = MRI.getType(SrcReg);
666-
667-
// FIXME: support the general case where the requested NarrowTy may not be
668-
// the same as the source type. E.g. s128 = sext(s32)
669-
if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
670-
SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
671-
LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
672-
return UnableToLegalize;
673-
}
674-
675-
// Shift the sign bit of the low register through the high register.
676-
auto ShiftAmt =
677-
MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
678-
auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
679-
MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
680-
MI.eraseFromParent();
681-
return Legalized;
682-
}
660+
case TargetOpcode::G_SEXT:
683661
case TargetOpcode::G_ZEXT:
684662
case TargetOpcode::G_ANYEXT: {
685663
if (TypeIdx != 0)
686664
return UnableToLegalize;
687665

688-
LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
666+
Register SrcReg = MI.getOperand(1).getReg();
667+
LLT SrcTy = MRI.getType(SrcReg);
689668
uint64_t SizeOp1 = SrcTy.getSizeInBits();
690669
if (SizeOp0 % SizeOp1 != 0)
691670
return UnableToLegalize;
692671

693672
Register PadReg;
694673
if (MI.getOpcode() == TargetOpcode::G_ZEXT)
695674
PadReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
696-
else
675+
else if (MI.getOpcode() == TargetOpcode::G_ANYEXT)
697676
PadReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
677+
else {
678+
// Shift the sign bit of the low register through the high register.
679+
auto ShiftAmt =
680+
MIRBuilder.buildConstant(LLT::scalar(64), SrcTy.getSizeInBits() - 1);
681+
PadReg = MIRBuilder.buildAShr(SrcTy, SrcReg, ShiftAmt).getReg(0);
682+
}
698683

699684
// Generate a merge where the bottom bits are taken from the source, and
700-
// zero/impdef everything else.
685+
// zero/impdef/sign bit everything else.
701686
unsigned NumParts = SizeOp0 / SizeOp1;
702-
SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
687+
SmallVector<Register, 4> Srcs = {SrcReg};
703688
for (unsigned Part = 1; Part < NumParts; ++Part)
704689
Srcs.push_back(PadReg);
705690
MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
2+
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
33

44
---
55
name: test_sext_s32_to_s64
@@ -336,8 +336,10 @@ body: |
336336
337337
; CHECK-LABEL: name: test_sext_s32_to_s128
338338
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
339-
; CHECK: [[SEXT:%[0-9]+]]:_(s128) = G_SEXT [[COPY]](s32)
340-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s128)
339+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
340+
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
341+
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32)
342+
; CHECK: S_ENDPGM 0, implicit [[MV]](s128)
341343
%0:_(s32) = COPY $vgpr0
342344
%1:_(s128) = G_SEXT %0
343345
S_ENDPGM 0, implicit %1
@@ -351,8 +353,10 @@ body: |
351353
352354
; CHECK-LABEL: name: test_sext_s32_to_s256
353355
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
354-
; CHECK: [[SEXT:%[0-9]+]]:_(s256) = G_SEXT [[COPY]](s32)
355-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s256)
356+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
357+
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
358+
; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32)
359+
; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
356360
%0:_(s32) = COPY $vgpr0
357361
%1:_(s256) = G_SEXT %0
358362
S_ENDPGM 0, implicit %1
@@ -366,8 +370,10 @@ body: |
366370
367371
; CHECK-LABEL: name: test_sext_s32_to_s512
368372
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
369-
; CHECK: [[SEXT:%[0-9]+]]:_(s512) = G_SEXT [[COPY]](s32)
370-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s512)
373+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
374+
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
375+
; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32)
376+
; CHECK: S_ENDPGM 0, implicit [[MV]](s512)
371377
%0:_(s32) = COPY $vgpr0
372378
%1:_(s512) = G_SEXT %0
373379
S_ENDPGM 0, implicit %1
@@ -381,8 +387,10 @@ body: |
381387
382388
; CHECK-LABEL: name: test_sext_s32_to_s1024
383389
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
384-
; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[COPY]](s32)
385-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s1024)
390+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
391+
; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
392+
; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32)
393+
; CHECK: S_ENDPGM 0, implicit [[MV]](s1024)
386394
%0:_(s32) = COPY $vgpr0
387395
%1:_(s1024) = G_SEXT %0
388396
S_ENDPGM 0, implicit %1
@@ -413,8 +421,10 @@ body: |
413421
414422
; CHECK-LABEL: name: test_sext_s64_to_s256
415423
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
416-
; CHECK: [[SEXT:%[0-9]+]]:_(s256) = G_SEXT [[COPY]](s64)
417-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s256)
424+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
425+
; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
426+
; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
427+
; CHECK: S_ENDPGM 0, implicit [[MV]](s256)
418428
%0:_(s64) = COPY $vgpr0_vgpr1
419429
%1:_(s256) = G_SEXT %0
420430
S_ENDPGM 0, implicit %1
@@ -428,8 +438,10 @@ body: |
428438
429439
; CHECK-LABEL: name: test_sext_s64_to_s512
430440
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
431-
; CHECK: [[SEXT:%[0-9]+]]:_(s512) = G_SEXT [[COPY]](s64)
432-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s512)
441+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
442+
; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
443+
; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
444+
; CHECK: S_ENDPGM 0, implicit [[MV]](s512)
433445
%0:_(s64) = COPY $vgpr0_vgpr1
434446
%1:_(s512) = G_SEXT %0
435447
S_ENDPGM 0, implicit %1
@@ -443,8 +455,10 @@ body: |
443455
444456
; CHECK-LABEL: name: test_sext_s64_to_s1024
445457
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
446-
; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[COPY]](s64)
447-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s1024)
458+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
459+
; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
460+
; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64), [[ASHR]](s64)
461+
; CHECK: S_ENDPGM 0, implicit [[MV]](s1024)
448462
%0:_(s64) = COPY $vgpr0_vgpr1
449463
%1:_(s1024) = G_SEXT %0
450464
S_ENDPGM 0, implicit %1
@@ -469,8 +483,27 @@ body: |
469483
470484
; CHECK-LABEL: name: test_sext_s128_to_s256
471485
; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
472-
; CHECK: [[SEXT:%[0-9]+]]:_(s256) = G_SEXT [[COPY]](s128)
473-
; CHECK: S_ENDPGM 0, implicit [[SEXT]](s256)
486+
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
487+
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64
488+
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
489+
; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[C1]]
490+
; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[C]]
491+
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
492+
; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[C]](s32), [[C1]]
493+
; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[C]](s32), [[C2]]
494+
; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
495+
; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32)
496+
; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32)
497+
; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
498+
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
499+
; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C3]](s32)
500+
; CHECK: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32)
501+
; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]]
502+
; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]]
503+
; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]]
504+
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64)
505+
; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[MV]](s128)
506+
; CHECK: S_ENDPGM 0, implicit [[MV1]](s256)
474507
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
475508
%1:_(s256) = G_SEXT %0
476509
S_ENDPGM 0, implicit %1

0 commit comments

Comments
 (0)