Skip to content

Commit 2d301e6

Browse files
committed
use clamp + add s8 testcases
1 parent 1e02b2d commit 2d301e6

File tree

3 files changed

+76
-6
lines changed

3 files changed

+76
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2013,10 +2013,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
20132013

20142014
// S64 is only legal on SALU, and needs to be broken into 32-bit elements in
20152015
// RegBankSelect.
2016-
auto &SextInReg =
2017-
getActionDefinitionsBuilder(G_SEXT_INREG)
2018-
.legalFor({{S32}, {S64}})
2019-
.widenScalarIf(typeIs(0, S16), widenScalarOrEltToNextPow2(0, 32));
2016+
auto &SextInReg = getActionDefinitionsBuilder(G_SEXT_INREG)
2017+
.legalFor({{S32}, {S64}})
2018+
.clampScalar(0, S32, S64);
20202019

20212020
if (ST.hasVOP3PInsts()) {
20222021
SextInReg.lowerFor({{V2S16}})

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,78 @@ body: |
480480
481481
...
482482

483+
---
484+
name: test_sext_inreg_s8_1
485+
body: |
486+
bb.0:
487+
liveins: $vgpr0
488+
489+
; GFX9-LABEL: name: test_sext_inreg_s8_1
490+
; GFX9: liveins: $vgpr0
491+
; GFX9-NEXT: {{ $}}
492+
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
493+
; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
494+
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXT_INREG]](s32)
495+
; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8)
496+
;
497+
; GFX8-LABEL: name: test_sext_inreg_s8_1
498+
; GFX8: liveins: $vgpr0
499+
; GFX8-NEXT: {{ $}}
500+
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
501+
; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
502+
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXT_INREG]](s32)
503+
; GFX8-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8)
504+
;
505+
; GFX6-LABEL: name: test_sext_inreg_s8_1
506+
; GFX6: liveins: $vgpr0
507+
; GFX6-NEXT: {{ $}}
508+
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
509+
; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 1
510+
; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXT_INREG]](s32)
511+
; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8)
512+
%0:_(s32) = COPY $vgpr0
513+
%1:_(s8) = G_TRUNC %0
514+
%2:_(s8) = G_SEXT_INREG %1, 1
515+
S_ENDPGM 0, implicit %2
516+
517+
...
518+
519+
---
520+
name: test_sext_inreg_s16_7
521+
body: |
522+
bb.0:
523+
liveins: $vgpr0
524+
525+
; GFX9-LABEL: name: test_sext_inreg_s16_7
526+
; GFX9: liveins: $vgpr0
527+
; GFX9-NEXT: {{ $}}
528+
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
529+
; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
530+
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXT_INREG]](s32)
531+
; GFX9-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8)
532+
;
533+
; GFX8-LABEL: name: test_sext_inreg_s16_7
534+
; GFX8: liveins: $vgpr0
535+
; GFX8-NEXT: {{ $}}
536+
; GFX8-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
537+
; GFX8-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
538+
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXT_INREG]](s32)
539+
; GFX8-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8)
540+
;
541+
; GFX6-LABEL: name: test_sext_inreg_s16_7
542+
; GFX6: liveins: $vgpr0
543+
; GFX6-NEXT: {{ $}}
544+
; GFX6-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
545+
; GFX6-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7
546+
; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXT_INREG]](s32)
547+
; GFX6-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s8)
548+
%0:_(s32) = COPY $vgpr0
549+
%1:_(s8) = G_TRUNC %0
550+
%2:_(s8) = G_SEXT_INREG %1, 7
551+
S_ENDPGM 0, implicit %2
552+
553+
...
554+
483555
---
484556
name: test_sext_inreg_s96_8
485557
body: |

llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,8 +619,7 @@ define <2 x i16> @v_sext_inreg_v2i16_8(<2 x i16> %value) {
619619
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620620
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff
621621
; GFX8-NEXT: v_and_b32_sdwa v2, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
622-
; GFX8-NEXT: v_and_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD
623-
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
622+
; GFX8-NEXT: v_and_b32_sdwa v0, sext(v0), v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD
624623
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
625624
; GFX8-NEXT: s_setpc_b64 s[30:31]
626625
;

0 commit comments

Comments
 (0)