|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -stop-after=finalize-isel -simplify-mir -o - %s | FileCheck %s |
| 3 | + |
| 4 | +; Make sure disjoint flag is preserved on or instructions through selection |
| 5 | + |
| 6 | +define amdgpu_ps i32 @s_or_i32_disjoint(i32 inreg %a, i32 inreg %b) { |
| 7 | + ; CHECK-LABEL: name: s_or_i32_disjoint |
| 8 | + ; CHECK: bb.0 (%ir-block.0): |
| 9 | + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 |
| 10 | + ; CHECK-NEXT: {{ $}} |
| 11 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1 |
| 12 | + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 |
| 13 | + ; CHECK-NEXT: %3:sreg_32 = disjoint S_OR_B32 [[COPY1]], [[COPY]], implicit-def dead $scc |
| 14 | + ; CHECK-NEXT: $sgpr0 = COPY %3 |
| 15 | + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0 |
| 16 | + %result = or disjoint i32 %a, %b |
| 17 | + ret i32 %result |
| 18 | +} |
| 19 | + |
| 20 | +define amdgpu_ps <2 x i32> @s_or_v2i32_disjoint(<2 x i32> inreg %a, <2 x i32> inreg %b) { |
| 21 | + ; CHECK-LABEL: name: s_or_v2i32_disjoint |
| 22 | + ; CHECK: bb.0 (%ir-block.0): |
| 23 | + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 |
| 24 | + ; CHECK-NEXT: {{ $}} |
| 25 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3 |
| 26 | + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2 |
| 27 | + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 |
| 28 | + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 |
| 29 | + ; CHECK-NEXT: %5:sreg_32 = disjoint S_OR_B32 [[COPY3]], [[COPY1]], implicit-def dead $scc |
| 30 | + ; CHECK-NEXT: %6:sreg_32 = disjoint S_OR_B32 [[COPY2]], [[COPY]], implicit-def dead $scc |
| 31 | + ; CHECK-NEXT: $sgpr0 = COPY %5 |
| 32 | + ; CHECK-NEXT: $sgpr1 = COPY %6 |
| 33 | + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 |
| 34 | + %result = or disjoint <2 x i32> %a, %b |
| 35 | + ret <2 x i32> %result |
| 36 | +} |
| 37 | + |
| 38 | +define i32 @v_or_i32_disjoint(i32 %a, i32 %b) { |
| 39 | + ; CHECK-LABEL: name: v_or_i32_disjoint |
| 40 | + ; CHECK: bb.0 (%ir-block.0): |
| 41 | + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 |
| 42 | + ; CHECK-NEXT: {{ $}} |
| 43 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| 44 | + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| 45 | + ; CHECK-NEXT: %9:vgpr_32 = disjoint V_OR_B32_e64 [[COPY1]], [[COPY]], implicit $exec |
| 46 | + ; CHECK-NEXT: $vgpr0 = COPY %9 |
| 47 | + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 |
| 48 | + %result = or disjoint i32 %a, %b |
| 49 | + ret i32 %result |
| 50 | +} |
| 51 | + |
| 52 | +define <2 x i32> @v_or_v2i32_disjoint(<2 x i32> %a, <2 x i32> %b) { |
| 53 | + ; CHECK-LABEL: name: v_or_v2i32_disjoint |
| 54 | + ; CHECK: bb.0 (%ir-block.0): |
| 55 | + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 |
| 56 | + ; CHECK-NEXT: {{ $}} |
| 57 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 |
| 58 | + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| 59 | + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| 60 | + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| 61 | + ; CHECK-NEXT: %11:vgpr_32 = disjoint V_OR_B32_e64 [[COPY3]], [[COPY1]], implicit $exec |
| 62 | + ; CHECK-NEXT: %12:vgpr_32 = disjoint V_OR_B32_e64 [[COPY2]], [[COPY]], implicit $exec |
| 63 | + ; CHECK-NEXT: $vgpr0 = COPY %11 |
| 64 | + ; CHECK-NEXT: $vgpr1 = COPY %12 |
| 65 | + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 |
| 66 | + %result = or disjoint <2 x i32> %a, %b |
| 67 | + ret <2 x i32> %result |
| 68 | +} |
| 69 | + |
| 70 | +define amdgpu_ps i64 @s_or_i64_disjoint(i64 inreg %a, i64 inreg %b) { |
| 71 | + ; CHECK-LABEL: name: s_or_i64_disjoint |
| 72 | + ; CHECK: bb.0 (%ir-block.0): |
| 73 | + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 |
| 74 | + ; CHECK-NEXT: {{ $}} |
| 75 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3 |
| 76 | + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2 |
| 77 | + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 |
| 78 | + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 |
| 79 | + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 |
| 80 | + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 |
| 81 | + ; CHECK-NEXT: %7:sreg_64 = disjoint S_OR_B64 killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], implicit-def dead $scc |
| 82 | + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %7.sub1 |
| 83 | + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY %7.sub0 |
| 84 | + ; CHECK-NEXT: $sgpr0 = COPY [[COPY5]] |
| 85 | + ; CHECK-NEXT: $sgpr1 = COPY [[COPY4]] |
| 86 | + ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 |
| 87 | + %result = or disjoint i64 %a, %b |
| 88 | + ret i64 %result |
| 89 | +} |
| 90 | + |
| 91 | +define i64 @v_or_i64_disjoint(i64 %a, i64 %b) { |
| 92 | + ; CHECK-LABEL: name: v_or_i64_disjoint |
| 93 | + ; CHECK: bb.0 (%ir-block.0): |
| 94 | + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 |
| 95 | + ; CHECK-NEXT: {{ $}} |
| 96 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 |
| 97 | + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 |
| 98 | + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 |
| 99 | + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 |
| 100 | + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF |
| 101 | + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF |
| 102 | + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 |
| 103 | + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF |
| 104 | + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF |
| 105 | + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 |
| 106 | + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 |
| 107 | + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 |
| 108 | + ; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY5]], killed [[COPY4]], implicit $exec |
| 109 | + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 |
| 110 | + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 |
| 111 | + ; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed [[COPY7]], killed [[COPY6]], implicit $exec |
| 112 | + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_OR_B32_e64_1]], %subreg.sub0, killed [[V_OR_B32_e64_]], %subreg.sub1 |
| 113 | + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub1 |
| 114 | + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE2]].sub0 |
| 115 | + ; CHECK-NEXT: $vgpr0 = COPY [[COPY9]] |
| 116 | + ; CHECK-NEXT: $vgpr1 = COPY [[COPY8]] |
| 117 | + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 |
| 118 | + %result = or disjoint i64 %a, %b |
| 119 | + ret i64 %result |
| 120 | +} |
0 commit comments