Skip to content

Commit 295bbea

Browse files
committed
AMDGPU/GlobalISel: Fix non-power-of-2 G_SITOFP/G_UITOFP
This wouldn't work for s33-s63 sources.
1 parent 24c1561 commit 295bbea

File tree

3 files changed

+180
-1
lines changed

3 files changed

+180
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
494494
if (ST.has16BitInsts())
495495
IToFP.legalFor({{S16, S16}});
496496
IToFP.clampScalar(1, S32, S64)
497-
.scalarize(0);
497+
.scalarize(0)
498+
.widenScalarToNextPow2(1);
498499

499500
auto &FPToI = getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
500501
.legalFor({{S32, S32}, {S32, S64}, {S32, S16}})

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,3 +443,103 @@ body: |
443443
%2:_(s64) = G_SITOFP %1
444444
$vgpr0_vgpr1 = COPY %2
445445
...
446+
447+
---
448+
name: test_sitofp_s33_to_s32
449+
body: |
450+
bb.0:
451+
liveins: $vgpr0_vgpr1
452+
453+
; GFX6-LABEL: name: test_sitofp_s33_to_s32
454+
; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
455+
; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
456+
; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
457+
; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
458+
; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
459+
; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
460+
; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
461+
; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
462+
; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
463+
; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
464+
; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
465+
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
466+
; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
467+
; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
468+
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
469+
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
470+
; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
471+
; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
472+
; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
473+
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
474+
; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
475+
; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
476+
; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
477+
; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
478+
; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
479+
; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
480+
; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
481+
; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
482+
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
483+
; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
484+
; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
485+
; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
486+
; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
487+
; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
488+
; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
489+
; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
490+
; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
491+
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64)
492+
; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]]
493+
; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
494+
; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]]
495+
; GFX6: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s64)
496+
; GFX6: $vgpr0 = COPY [[SITOFP]](s32)
497+
; GFX8-LABEL: name: test_sitofp_s33_to_s32
498+
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
499+
; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
500+
; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 33
501+
; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
502+
; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32)
503+
; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64)
504+
; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
505+
; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
506+
; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
507+
; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
508+
; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
509+
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
510+
; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
511+
; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
512+
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
513+
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
514+
; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
515+
; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
516+
; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
517+
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
518+
; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
519+
; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
520+
; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
521+
; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
522+
; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
523+
; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
524+
; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
525+
; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
526+
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
527+
; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
528+
; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
529+
; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
530+
; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
531+
; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
532+
; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
533+
; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
534+
; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
535+
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR]](s64)
536+
; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[UITOFP]]
537+
; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
538+
; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[UITOFP]]
539+
; GFX8: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[SEXT_INREG]](s64)
540+
; GFX8: $vgpr0 = COPY [[SITOFP]](s32)
541+
%0:_(s64) = COPY $vgpr0_vgpr1
542+
%1:_(s33) = G_TRUNC %0
543+
%2:_(s32) = G_SITOFP %1
544+
$vgpr0 = COPY %2
545+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,81 @@ body: |
405405
%2:_(s64) = G_UITOFP %1
406406
$vgpr0_vgpr1 = COPY %2
407407
...
408+
409+
---
410+
name: test_uitofp_s33_to_s32
411+
body: |
412+
bb.0:
413+
liveins: $vgpr0_vgpr1
414+
415+
; GFX6-LABEL: name: test_uitofp_s33_to_s32
416+
; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
417+
; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
418+
; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
419+
; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
420+
; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
421+
; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
422+
; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64)
423+
; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
424+
; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
425+
; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s64), [[C2]]
426+
; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
427+
; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
428+
; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[CTLZ_ZERO_UNDEF]](s32)
429+
; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
430+
; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
431+
; GFX6: [[AND2:%[0-9]+]]:_(s64) = G_AND [[AND1]], [[C5]]
432+
; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
433+
; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C6]](s32)
434+
; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
435+
; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
436+
; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
437+
; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
438+
; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
439+
; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND2]](s64), [[C8]]
440+
; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s64), [[C8]]
441+
; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
442+
; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
443+
; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND3]], [[C1]]
444+
; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
445+
; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
446+
; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64)
447+
; GFX6: $vgpr0 = COPY [[UITOFP]](s32)
448+
; GFX8-LABEL: name: test_uitofp_s33_to_s32
449+
; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
450+
; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591
451+
; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
452+
; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
453+
; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
454+
; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
455+
; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64)
456+
; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
457+
; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
458+
; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND]](s64), [[C2]]
459+
; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
460+
; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
461+
; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[CTLZ_ZERO_UNDEF]](s32)
462+
; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
463+
; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
464+
; GFX8: [[AND2:%[0-9]+]]:_(s64) = G_AND [[AND1]], [[C5]]
465+
; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
466+
; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C6]](s32)
467+
; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
468+
; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
469+
; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
470+
; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
471+
; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
472+
; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND2]](s64), [[C8]]
473+
; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s64), [[C8]]
474+
; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
475+
; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
476+
; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND3]], [[C1]]
477+
; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
478+
; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
479+
; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s64)
480+
; GFX8: $vgpr0 = COPY [[UITOFP]](s32)
481+
%0:_(s64) = COPY $vgpr0_vgpr1
482+
%1:_(s33) = G_TRUNC %0
483+
%2:_(s32) = G_UITOFP %1
484+
$vgpr0 = COPY %2
485+
...

0 commit comments

Comments
 (0)