llvm
diff --git a/‎llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Lines changed: 19 additions & 28 deletions b/‎llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Lines changed: 19 additions & 28 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
Lines changed: 142 additions & 0 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
Lines changed: 142 additions & 0 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir
Lines changed: 13 additions & 10 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.div.fmas.mir
Lines changed: 13 additions & 10 deletions
diff --git a/‎llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir
Lines changed: 12 additions & 7 deletions b/‎llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir
Lines changed: 12 additions & 7 deletions
@@ -572,27 +572,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     return AltMappings;
 
   }
-  case TargetOpcode::G_ICMP: {
-    // TODO: Should report 32-bit for scalar output type.
-    unsigned Size = getSizeInBits(MI.getOperand(2).getReg(), MRI, *TRI);
-    const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
-                          nullptr, // Predicate operand.
-                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
-                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}),
-      4); // Num Operands
-    AltMappings.push_back(&SSMapping);
-
-    const InstructionMapping &VVMapping = getInstructionMapping(4, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
-                          nullptr, // Predicate operand.
-                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
-                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size)}),
-      4); // Num Operands
-    AltMappings.push_back(&VVMapping);
-
-    return AltMappings;
-  }
   case TargetOpcode::G_SELECT: {
     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
     const InstructionMapping &SSMapping = getInstructionMapping(1, 1,
@@ -1832,7 +1811,13 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
 
     MachineBasicBlock *MBB = MI.getParent();
     B.setInsertPt(*MBB, std::next(MI.getIterator()));
-    B.buildTrunc(DstReg, NewDstReg);
+
+    // If we had a constrained VCC result register, a copy was inserted to VCC
+    // from SGPR.
+    SmallVector<Register, 1> DefRegs(OpdMapper.getVRegs(0));
+    if (DefRegs.empty())
+      DefRegs.push_back(DstReg);
+    B.buildTrunc(DefRegs[0], NewDstReg);
     return;
   }
   case AMDGPU::G_SELECT: {
@@ -3287,25 +3272,31 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   case AMDGPU::G_ICMP: {
     auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
     unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+
+    // See if the result register has already been constrained to vcc, which may
+    // happen due to control flow intrinsic lowering.
+    unsigned DstBank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
+                                    AMDGPU::SGPRRegBankID);
     unsigned Op2Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI);
     unsigned Op3Bank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI);
 
-    bool CanUseSCC = Op2Bank == AMDGPU::SGPRRegBankID &&
+    bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
+                     Op2Bank == AMDGPU::SGPRRegBankID &&
                      Op3Bank == AMDGPU::SGPRRegBankID &&
       (Size == 32 || (Size == 64 &&
                       (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
                       Subtarget.hasScalarCompareEq64()));
 
-    unsigned Op0Bank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
+    DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
+    unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
 
     // TODO: Use 32-bit for scalar output size.
     // SCC results will need to be copied to a 32-bit SGPR virtual register.
     const unsigned ResultSize = 1;
 
-    OpdsMapping[0] = AMDGPU::getValueMapping(Op0Bank, ResultSize);
-    OpdsMapping[1] = nullptr; // Predicate Operand.
-    OpdsMapping[2] = AMDGPU::getValueMapping(Op2Bank, Size);
-    OpdsMapping[3] = AMDGPU::getValueMapping(Op3Bank, Size);
+    OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
+    OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, Size);
+    OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank, Size);
     break;
   }
   case AMDGPU::G_EXTRACT_VECTOR_ELT: {
 
@@ -56,3 +56,145 @@ if.true:
   %val = load volatile i32, i32 addrspace(1)* undef
   br label %endif
 }
+
+; Make sure and 1 is inserted on llvm.amdgcn.if
+define i32 @divergent_if_nonboolean_condition0(i32 %value) {
+; CHECK-LABEL: divergent_if_nonboolean_condition0:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    ; implicit-def: $vgpr0
+; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT:    s_cbranch_execz BB2_2
+; CHECK-NEXT:  ; %bb.1: ; %if.true
+; CHECK-NEXT:    global_load_dword v0, v[0:1], off
+; CHECK-NEXT:  BB2_2: ; %endif
+; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %c = trunc i32 %value to i1
+  br i1 %c, label %if.true, label %endif
+
+if.true:
+  %val = load volatile i32, i32 addrspace(1)* undef
+  br label %endif
+
+endif:
+  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
+  ret i32 %v
+}
+
+; Make sure and 1 is inserted on llvm.amdgcn.if
+define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) {
+; CHECK-LABEL: divergent_if_nonboolean_condition1:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    global_load_dword v0, v[0:1], off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; CHECK-NEXT:    ; implicit-def: $vgpr0
+; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT:    s_cbranch_execz BB3_2
+; CHECK-NEXT:  ; %bb.1: ; %if.true
+; CHECK-NEXT:    global_load_dword v0, v[0:1], off
+; CHECK-NEXT:  BB3_2: ; %endif
+; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %value = load i32, i32 addrspace(1)* %ptr
+  %c = trunc i32 %value to i1
+  br i1 %c, label %if.true, label %endif
+
+if.true:
+  %val = load volatile i32, i32 addrspace(1)* undef
+  br label %endif
+
+endif:
+  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
+  ret i32 %v
+}
+
+@external_constant = external addrspace(4) constant i32, align 4
+@const.ptr = external addrspace(4) constant float*, align 4
+
+; Make sure this case compiles. G_ICMP was mis-mapped due to having
+; the result register class constrained by llvm.amdgcn.if lowering.
+define void @constrained_if_register_class() {
+; CHECK-LABEL: constrained_if_register_class:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_getpc_b64 s[4:5]
+; CHECK-NEXT:    s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+4
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_load_dword s6, s[4:5], 0x0
+; CHECK-NEXT:    s_getpc_b64 s[4:5]
+; CHECK-NEXT:    s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_cmp_lg_u32 s6, 0
+; CHECK-NEXT:    s_cselect_b32 s6, 1, 0
+; CHECK-NEXT:    s_and_b32 s6, s6, 1
+; CHECK-NEXT:    s_cmp_lg_u32 s6, 0
+; CHECK-NEXT:    s_cbranch_scc1 BB4_6
+; CHECK-NEXT:  ; %bb.1: ; %bb2
+; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
+; CHECK-NEXT:    s_mov_b32 s4, -1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, s6
+; CHECK-NEXT:    v_mov_b32_e32 v1, s7
+; CHECK-NEXT:    flat_load_dword v0, v[0:1]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, 1.0, v0
+; CHECK-NEXT:    s_xor_b64 s[8:9], vcc, s[6:7]
+; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[8:9]
+; CHECK-NEXT:  ; %bb.2: ; %bb7
+; CHECK-NEXT:    s_mov_b32 s4, 0
+; CHECK-NEXT:  ; %bb.3: ; %bb8
+; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[6:7], s4, 0
+; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_cbranch_execz BB4_5
+; CHECK-NEXT:  ; %bb.4: ; %bb11
+; CHECK-NEXT:    v_mov_b32_e32 v0, 4.0
+; CHECK-NEXT:    buffer_store_dword v0, v0, s[0:3], s33 offen
+; CHECK-NEXT:  BB4_5: ; %Flow
+; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT:  BB4_6: ; %bb12
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %tmp = load i32, i32 addrspace(4)* @external_constant
+  %ptr = load float*, float* addrspace(4)* @const.ptr
+  %tmp1 = icmp ne i32 %tmp, 0
+  br i1 %tmp1, label %bb12, label %bb2
+
+bb2:
+  %tmp4 = load float, float* %ptr, align 4
+  %tmp5 = fcmp olt float %tmp4, 1.0
+  %tmp6 = or i1 %tmp5, false
+  br i1 %tmp6, label %bb8, label %bb7
+
+bb7:
+  br label %bb8
+
+bb8:
+  %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ]
+  %tmp10 = icmp eq i32 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb12
+
+bb11:
+  store float 4.0, float addrspace(5)* undef, align 4
+  br label %bb12
+
+bb12:
+  ret void
+}
@@ -44,11 +44,12 @@ body: |
     ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]]
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[ICMP]](s1)
+    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]]
+    ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[ICMP]](s1)
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $sgpr1
     %2:_(s32) = COPY $sgpr2
@@ -71,10 +72,11 @@ body: |
     ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]]
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY4]](s32), [[COPY5]](s32), [[ICMP]](s1)
+    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]]
+    ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY5]](s32), [[COPY6]](s32), [[ICMP]](s1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $sgpr0
     %2:_(s32) = COPY $sgpr1
@@ -97,7 +99,8 @@ body: |
     ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
     ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]]
+    ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[COPY4]]
     ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[ICMP]](s1)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
 
@@ -86,8 +86,10 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]]
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
     ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
@@ -157,9 +159,10 @@ body: |
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
     ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
     ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
-    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
-    ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
-    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY2]]
+    ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY3]], [[ICMP1]]
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = COPY $vgpr0
     %2:_(s32) = G_CONSTANT i32 0
@@ -179,8 +182,10 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
     ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
     ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
-    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]]
-    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]]
+    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY2]]
+    ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+    ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]]
     ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1