[AMDGPU][True16][CodeGen] uaddsat/usubsat true16 selection in gisel #128233

broxigarchen · 2025-02-21T21:08:54Z

Enable gisel selection for uaddsat and usubsat in true16 flow

This patch includes:

Added VGPR_16_Lo128/VGPR_16 to register bank and update register info for recognizing 16bit regclass id and bit width
uaddsat/usubsat test update

github-actions · 2025-02-21T21:10:34Z

✅ With the latest revision this PR passed the undef deprecator.

llvmbot · 2025-02-21T22:02:15Z

@llvm/pr-subscribers-llvm-globalisel

Author: Brox Chen (broxigarchen)

Changes

Enable gisel selection for uaddsat and usubsat in true16 flow

This patch includes:

Added VGPR_16_Lo128/VGPR_16 to register bank and update register info for recognizing 16bit regclass id and bit width
uaddsat/usubsat test update

Patch is 95.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128233.diff

14 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+1-1)
(modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+107-94)
(modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td (+1-1)
(modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+9-9)
(modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.h (+2-2)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll (+392-163)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll (+392-163)
(modified) llvm/test/CodeGen/AMDGPU/fadd.f16.ll (+3-1)
(modified) llvm/test/CodeGen/AMDGPU/fma.f16.ll (+4-6)
(modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+5-2)
(modified) llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll (+11-26)
(modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+16-11)
(modified) llvm/test/CodeGen/AMDGPU/v_pack.ll (+20-8)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a787c10a9421c..28c5a53508556 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -782,7 +782,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
     return true;
 
   // TODO: This should probably be a combine somewhere
-  // (build_vector $src0, undef) -> copy $src0
+  // (build_vector $src0, undef)  -> copy $src0
   MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
   if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
     MI.setDesc(TII.get(AMDGPU::COPY));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 649deee346e90..a6d0f35c4f94e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -223,8 +223,9 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
   };
 }
 
-static bool isRegisterSize(unsigned Size) {
-  return Size % 32 == 0 && Size <= MaxRegisterSize;
+static bool isRegisterSize(const GCNSubtarget &ST, unsigned Size) {
+  return ((ST.useRealTrue16Insts() && Size == 16) || Size % 32 == 0) &&
+         Size <= MaxRegisterSize;
 }
 
 static bool isRegisterVectorElementType(LLT EltTy) {
@@ -240,8 +241,8 @@ static bool isRegisterVectorType(LLT Ty) {
 }
 
 // TODO: replace all uses of isRegisterType with isRegisterClassType
-static bool isRegisterType(LLT Ty) {
-  if (!isRegisterSize(Ty.getSizeInBits()))
+static bool isRegisterType(const GCNSubtarget &ST, LLT Ty) {
+  if (!isRegisterSize(ST, Ty.getSizeInBits()))
     return false;
 
   if (Ty.isVector())
@@ -252,19 +253,21 @@ static bool isRegisterType(LLT Ty) {
 
 // Any combination of 32 or 64-bit elements up the maximum register size, and
 // multiples of v2s16.
-static LegalityPredicate isRegisterType(unsigned TypeIdx) {
-  return [=](const LegalityQuery &Query) {
-    return isRegisterType(Query.Types[TypeIdx]);
+static LegalityPredicate isRegisterType(const GCNSubtarget &ST,
+                                        unsigned TypeIdx) {
+  return [=, &ST](const LegalityQuery &Query) {
+    return isRegisterType(ST, Query.Types[TypeIdx]);
   };
 }
 
 // RegisterType that doesn't have a corresponding RegClass.
 // TODO: Once `isRegisterType` is replaced with `isRegisterClassType` this
 // should be removed.
-static LegalityPredicate isIllegalRegisterType(unsigned TypeIdx) {
-  return [=](const LegalityQuery &Query) {
+static LegalityPredicate isIllegalRegisterType(const GCNSubtarget &ST,
+                                               unsigned TypeIdx) {
+  return [=, &ST](const LegalityQuery &Query) {
     LLT Ty = Query.Types[TypeIdx];
-    return isRegisterType(Ty) &&
+    return isRegisterType(ST, Ty) &&
            !SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits());
   };
 }
@@ -348,17 +351,20 @@ static std::initializer_list<LLT> AllS64Vectors = {V2S64, V3S64, V4S64, V5S64,
                                                    V6S64, V7S64, V8S64, V16S64};
 
 // Checks whether a type is in the list of legal register types.
-static bool isRegisterClassType(LLT Ty) {
+static bool isRegisterClassType(const GCNSubtarget &ST, LLT Ty) {
   if (Ty.isPointerOrPointerVector())
     Ty = Ty.changeElementType(LLT::scalar(Ty.getScalarSizeInBits()));
 
   return is_contained(AllS32Vectors, Ty) || is_contained(AllS64Vectors, Ty) ||
-         is_contained(AllScalarTypes, Ty) || is_contained(AllS16Vectors, Ty);
+         is_contained(AllScalarTypes, Ty) ||
+         (ST.useRealTrue16Insts() && Ty == S16) ||
+         is_contained(AllS16Vectors, Ty);
 }
 
-static LegalityPredicate isRegisterClassType(unsigned TypeIdx) {
-  return [TypeIdx](const LegalityQuery &Query) {
-    return isRegisterClassType(Query.Types[TypeIdx]);
+static LegalityPredicate isRegisterClassType(const GCNSubtarget &ST,
+                                             unsigned TypeIdx) {
+  return [&ST, TypeIdx](const LegalityQuery &Query) {
+    return isRegisterClassType(ST, Query.Types[TypeIdx]);
   };
 }
 
@@ -510,7 +516,7 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
 
 static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query) {
   const LLT Ty = Query.Types[0];
-  return isRegisterType(Ty) && isLoadStoreSizeLegal(ST, Query) &&
+  return isRegisterType(ST, Ty) && isLoadStoreSizeLegal(ST, Query) &&
          !hasBufferRsrcWorkaround(Ty) && !loadStoreBitcastWorkaround(Ty);
 }
 
@@ -523,12 +529,12 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
   if (Size != MemSizeInBits)
     return Size <= 32 && Ty.isVector();
 
-  if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty))
+  if (loadStoreBitcastWorkaround(Ty) && isRegisterType(ST, Ty))
     return true;
 
   // Don't try to handle bitcasting vector ext loads for now.
   return Ty.isVector() && (!MemTy.isVector() || MemTy == Ty) &&
-         (Size <= 32 || isRegisterSize(Size)) &&
+         (Size <= 32 || isRegisterSize(ST, Size)) &&
          !isRegisterVectorElementType(Ty.getElementType());
 }
 
@@ -875,7 +881,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
   getActionDefinitionsBuilder(G_BITCAST)
       // Don't worry about the size constraint.
-      .legalIf(all(isRegisterClassType(0), isRegisterClassType(1)))
+      .legalIf(all(isRegisterClassType(ST, 0), isRegisterClassType(ST, 1)))
       .lower();
 
   getActionDefinitionsBuilder(G_CONSTANT)
@@ -890,7 +896,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     .clampScalar(0, S16, S64);
 
   getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
-      .legalIf(isRegisterClassType(0))
+      .legalIf(isRegisterClassType(ST, 0))
       // s1 and s16 are special cases because they have legal operations on
       // them, but don't really occupy registers in the normal way.
       .legalFor({S1, S16})
@@ -1779,7 +1785,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     unsigned IdxTypeIdx = 2;
 
     getActionDefinitionsBuilder(Op)
-      .customIf([=](const LegalityQuery &Query) {
+        .customIf([=](const LegalityQuery &Query) {
           const LLT EltTy = Query.Types[EltTypeIdx];
           const LLT VecTy = Query.Types[VecTypeIdx];
           const LLT IdxTy = Query.Types[IdxTypeIdx];
@@ -1800,36 +1806,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
                   IdxTy.getSizeInBits() == 32 &&
                   isLegalVecType;
         })
-      .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
-                 bitcastToVectorElement32(VecTypeIdx))
-      //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
-      .bitcastIf(
-        all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltWiderThan(VecTypeIdx, 64)),
-        [=](const LegalityQuery &Query) {
-          // For > 64-bit element types, try to turn this into a 64-bit
-          // element vector since we may be able to do better indexing
-          // if this is scalar. If not, fall back to 32.
-          const LLT EltTy = Query.Types[EltTypeIdx];
-          const LLT VecTy = Query.Types[VecTypeIdx];
-          const unsigned DstEltSize = EltTy.getSizeInBits();
-          const unsigned VecSize = VecTy.getSizeInBits();
-
-          const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32;
-          return std::pair(
-              VecTypeIdx,
-              LLT::fixed_vector(VecSize / TargetEltSize, TargetEltSize));
-        })
-      .clampScalar(EltTypeIdx, S32, S64)
-      .clampScalar(VecTypeIdx, S32, S64)
-      .clampScalar(IdxTypeIdx, S32, S32)
-      .clampMaxNumElements(VecTypeIdx, S32, 32)
-      // TODO: Clamp elements for 64-bit vectors?
-      .moreElementsIf(
-        isIllegalRegisterType(VecTypeIdx),
-        moreElementsToNextExistingRegClass(VecTypeIdx))
-      // It should only be necessary with variable indexes.
-      // As a last resort, lower to the stack
-      .lower();
+        .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx),
+                       scalarOrEltNarrowerThan(VecTypeIdx, 32)),
+                   bitcastToVectorElement32(VecTypeIdx))
+        //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
+        .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx),
+                       scalarOrEltWiderThan(VecTypeIdx, 64)),
+                   [=](const LegalityQuery &Query) {
+                     // For > 64-bit element types, try to turn this into a
+                     // 64-bit element vector since we may be able to do better
+                     // indexing if this is scalar. If not, fall back to 32.
+                     const LLT EltTy = Query.Types[EltTypeIdx];
+                     const LLT VecTy = Query.Types[VecTypeIdx];
+                     const unsigned DstEltSize = EltTy.getSizeInBits();
+                     const unsigned VecSize = VecTy.getSizeInBits();
+
+                     const unsigned TargetEltSize =
+                         DstEltSize % 64 == 0 ? 64 : 32;
+                     return std::pair(VecTypeIdx,
+                                      LLT::fixed_vector(VecSize / TargetEltSize,
+                                                        TargetEltSize));
+                   })
+        .clampScalar(EltTypeIdx, S32, S64)
+        .clampScalar(VecTypeIdx, S32, S64)
+        .clampScalar(IdxTypeIdx, S32, S32)
+        .clampMaxNumElements(VecTypeIdx, S32, 32)
+        // TODO: Clamp elements for 64-bit vectors?
+        .moreElementsIf(isIllegalRegisterType(ST, VecTypeIdx),
+                        moreElementsToNextExistingRegClass(VecTypeIdx))
+        // It should only be necessary with variable indexes.
+        // As a last resort, lower to the stack
+        .lower();
   }
 
   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -1876,15 +1883,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
   }
 
-  auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
-    .legalForCartesianProduct(AllS32Vectors, {S32})
-    .legalForCartesianProduct(AllS64Vectors, {S64})
-    .clampNumElements(0, V16S32, V32S32)
-    .clampNumElements(0, V2S64, V16S64)
-    .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16))
-    .moreElementsIf(
-      isIllegalRegisterType(0),
-      moreElementsToNextExistingRegClass(0));
+  auto &BuildVector =
+      getActionDefinitionsBuilder(G_BUILD_VECTOR)
+          .legalForCartesianProduct(AllS32Vectors, {S32})
+          .legalForCartesianProduct(AllS64Vectors, {S64})
+          .clampNumElements(0, V16S32, V32S32)
+          .clampNumElements(0, V2S64, V16S64)
+          .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16))
+          .moreElementsIf(isIllegalRegisterType(ST, 0),
+                          moreElementsToNextExistingRegClass(0));
 
   if (ST.hasScalarPackInsts()) {
     BuildVector
@@ -1904,14 +1911,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       .lower();
   }
 
-  BuildVector.legalIf(isRegisterType(0));
+  BuildVector.legalIf(isRegisterType(ST, 0));
 
   // FIXME: Clamp maximum size
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
-    .legalIf(all(isRegisterType(0), isRegisterType(1)))
-    .clampMaxNumElements(0, S32, 32)
-    .clampMaxNumElements(1, S16, 2) // TODO: Make 4?
-    .clampMaxNumElements(0, S16, 64);
+      .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1)))
+      .clampMaxNumElements(0, S32, 32)
+      .clampMaxNumElements(1, S16, 2) // TODO: Make 4?
+      .clampMaxNumElements(0, S16, 64);
 
   getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
 
@@ -1932,34 +1939,40 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       return false;
     };
 
-    auto &Builder = getActionDefinitionsBuilder(Op)
-      .legalIf(all(isRegisterType(0), isRegisterType(1)))
-      .lowerFor({{S16, V2S16}})
-      .lowerIf([=](const LegalityQuery &Query) {
-          const LLT BigTy = Query.Types[BigTyIdx];
-          return BigTy.getSizeInBits() == 32;
-        })
-      // Try to widen to s16 first for small types.
-      // TODO: Only do this on targets with legal s16 shifts
-      .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16)
-      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
-      .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
-      .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
-                           elementTypeIs(1, S16)),
-                       changeTo(1, V2S16))
-      // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
-      // worth considering the multiples of 64 since 2*192 and 2*384 are not
-      // valid.
-      .clampScalar(LitTyIdx, S32, S512)
-      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
-      // Break up vectors with weird elements into scalars
-      .fewerElementsIf(
-        [=](const LegalityQuery &Query) { return notValidElt(Query, LitTyIdx); },
-        scalarize(0))
-      .fewerElementsIf(
-        [=](const LegalityQuery &Query) { return notValidElt(Query, BigTyIdx); },
-        scalarize(1))
-      .clampScalar(BigTyIdx, S32, MaxScalar);
+    auto &Builder =
+        getActionDefinitionsBuilder(Op)
+            .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1)))
+            .lowerFor({{S16, V2S16}})
+            .lowerIf([=](const LegalityQuery &Query) {
+              const LLT BigTy = Query.Types[BigTyIdx];
+              return BigTy.getSizeInBits() == 32;
+            })
+            // Try to widen to s16 first for small types.
+            // TODO: Only do this on targets with legal s16 shifts
+            .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16)
+            .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+            .moreElementsIf(isSmallOddVector(BigTyIdx),
+                            oneMoreElement(BigTyIdx))
+            .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
+                                 elementTypeIs(1, S16)),
+                             changeTo(1, V2S16))
+            // Clamp the little scalar to s8-s256 and make it a power of 2. It's
+            // not worth considering the multiples of 64 since 2*192 and 2*384
+            // are not valid.
+            .clampScalar(LitTyIdx, S32, S512)
+            .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
+            // Break up vectors with weird elements into scalars
+            .fewerElementsIf(
+                [=](const LegalityQuery &Query) {
+                  return notValidElt(Query, LitTyIdx);
+                },
+                scalarize(0))
+            .fewerElementsIf(
+                [=](const LegalityQuery &Query) {
+                  return notValidElt(Query, BigTyIdx);
+                },
+                scalarize(1))
+            .clampScalar(BigTyIdx, S32, MaxScalar);
 
     if (Op == G_MERGE_VALUES) {
       Builder.widenScalarIf(
@@ -3146,7 +3159,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
     } else {
       // Extract the subvector.
 
-      if (isRegisterType(ValTy)) {
+      if (isRegisterType(ST, ValTy)) {
         // If this a case where G_EXTRACT is legal, use it.
         // (e.g. <3 x s32> -> <4 x s32>)
         WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 2d8dc9d47225e..1c1a6dac75a17 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -11,7 +11,7 @@ def SGPRRegBank : RegisterBank<"SGPR",
 >;
 
 def VGPRRegBank : RegisterBank<"VGPR",
-  [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
+  [VGPR_16_Lo128, VGPR_16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
 >;
 
 // It is helpful to distinguish conditions from ordinary SGPRs.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 71c720ed09b5f..e365690f8b4dc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -35,7 +35,7 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
   cl::ReallyHidden,
   cl::init(true));
 
-std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
+std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
 std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
 
 // Map numbers of DWORDs to indexes in SubRegFromChannelTable.
@@ -351,9 +351,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
   static auto InitializeRegSplitPartsOnce = [this]() {
     for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
       unsigned Size = getSubRegIdxSize(Idx);
-      if (Size & 31)
+      if (Size & 15)
         continue;
-      std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
+      std::vector<int16_t> &Vec = RegSplitParts[Size / 16 - 1];
       unsigned Pos = getSubRegIdxOffset(Idx);
       if (Pos % Size)
         continue;
@@ -3554,14 +3554,14 @@ bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI,
 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
                                                    unsigned EltSize) const {
   const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
-  assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
+  assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
 
-  const unsigned RegDWORDs = RegBitWidth / 32;
-  const unsigned EltDWORDs = EltSize / 4;
-  assert(RegSplitParts.size() + 1 >= EltDWORDs);
+  const unsigned RegHalves = RegBitWidth / 16;
+  const unsigned EltHalves = EltSize / 2;
+  assert(RegSplitParts.size() + 1 >= EltHalves);
 
-  const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
-  const unsigned NumParts = RegDWORDs / EltDWORDs;
+  const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
+  const unsigned NumParts = RegHalves / EltHalves;
 
   return ArrayRef(Parts.data(), NumParts);
 }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index a434efb70d052..a64180daea2ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -37,11 +37,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   BitVector RegPressureIgnoredUnits;
 
   /// Sub reg indexes for getRegSplitParts.
-  /// First index represents subreg size from 1 to 16 DWORDs.
+  /// First index represents subreg size from 1 to 32 Half DWORDS.
   /// The inner vector is sorted by bit offset.
   /// Provided a register can be fully split with given subregs,
   /// all elements of the inner vector combined give a full lane mask.
-  static std::array<std::vector<int16_t>, 16> RegSplitParts;
+  static std::array<std::vector<int16_t>, 32> RegSplitParts;
 
   // Table representing sub reg of given width and offset.
   // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c521d0dd3ad2d..6a92e54b69edc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2483,6 +2483,8 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 // (move from MC* level to Target* level). Return size in bits.
 unsigned getRegBitWidth(unsigned RCID) {
   switch (RCID) {
+  case AMDGPU::VGPR_16RegClassID:
+  case AMDGPU::VGPR_16_Lo128RegClassID:
   case AMDGPU::SGPR_LO16RegClassID:
   case AMDGPU::AGPR_LO16RegClassID:
     return 16;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 3d7fec9a5986c..2389924b82484 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix...
[truncated]

llvmbot · 2025-02-21T22:02:15Z

@llvm/pr-subscribers-backend-amdgpu

Author: Brox Chen (broxigarchen)

Changes

Enable gisel selection for uaddsat and usubsat in true16 flow

This patch includes:

Added VGPR_16_Lo128/VGPR_16 to register bank and update register info for recognizing 16bit regclass id and bit width
uaddsat/usubsat test update

Patch is 95.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128233.diff

14 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+1-1)
(modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+107-94)
(modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td (+1-1)
(modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp (+9-9)
(modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.h (+2-2)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+2)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll (+392-163)
(modified) llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll (+392-163)
(modified) llvm/test/CodeGen/AMDGPU/fadd.f16.ll (+3-1)
(modified) llvm/test/CodeGen/AMDGPU/fma.f16.ll (+4-6)
(modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+5-2)
(modified) llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll (+11-26)
(modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+16-11)
(modified) llvm/test/CodeGen/AMDGPU/v_pack.ll (+20-8)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index a787c10a9421c..28c5a53508556 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -782,7 +782,7 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
     return true;
 
   // TODO: This should probably be a combine somewhere
-  // (build_vector $src0, undef) -> copy $src0
+  // (build_vector $src0, undef)  -> copy $src0
   MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
   if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
     MI.setDesc(TII.get(AMDGPU::COPY));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 649deee346e90..a6d0f35c4f94e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -223,8 +223,9 @@ static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
   };
 }
 
-static bool isRegisterSize(unsigned Size) {
-  return Size % 32 == 0 && Size <= MaxRegisterSize;
+static bool isRegisterSize(const GCNSubtarget &ST, unsigned Size) {
+  return ((ST.useRealTrue16Insts() && Size == 16) || Size % 32 == 0) &&
+         Size <= MaxRegisterSize;
 }
 
 static bool isRegisterVectorElementType(LLT EltTy) {
@@ -240,8 +241,8 @@ static bool isRegisterVectorType(LLT Ty) {
 }
 
 // TODO: replace all uses of isRegisterType with isRegisterClassType
-static bool isRegisterType(LLT Ty) {
-  if (!isRegisterSize(Ty.getSizeInBits()))
+static bool isRegisterType(const GCNSubtarget &ST, LLT Ty) {
+  if (!isRegisterSize(ST, Ty.getSizeInBits()))
     return false;
 
   if (Ty.isVector())
@@ -252,19 +253,21 @@ static bool isRegisterType(LLT Ty) {
 
 // Any combination of 32 or 64-bit elements up the maximum register size, and
 // multiples of v2s16.
-static LegalityPredicate isRegisterType(unsigned TypeIdx) {
-  return [=](const LegalityQuery &Query) {
-    return isRegisterType(Query.Types[TypeIdx]);
+static LegalityPredicate isRegisterType(const GCNSubtarget &ST,
+                                        unsigned TypeIdx) {
+  return [=, &ST](const LegalityQuery &Query) {
+    return isRegisterType(ST, Query.Types[TypeIdx]);
   };
 }
 
 // RegisterType that doesn't have a corresponding RegClass.
 // TODO: Once `isRegisterType` is replaced with `isRegisterClassType` this
 // should be removed.
-static LegalityPredicate isIllegalRegisterType(unsigned TypeIdx) {
-  return [=](const LegalityQuery &Query) {
+static LegalityPredicate isIllegalRegisterType(const GCNSubtarget &ST,
+                                               unsigned TypeIdx) {
+  return [=, &ST](const LegalityQuery &Query) {
     LLT Ty = Query.Types[TypeIdx];
-    return isRegisterType(Ty) &&
+    return isRegisterType(ST, Ty) &&
            !SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits());
   };
 }
@@ -348,17 +351,20 @@ static std::initializer_list<LLT> AllS64Vectors = {V2S64, V3S64, V4S64, V5S64,
                                                    V6S64, V7S64, V8S64, V16S64};
 
 // Checks whether a type is in the list of legal register types.
-static bool isRegisterClassType(LLT Ty) {
+static bool isRegisterClassType(const GCNSubtarget &ST, LLT Ty) {
   if (Ty.isPointerOrPointerVector())
     Ty = Ty.changeElementType(LLT::scalar(Ty.getScalarSizeInBits()));
 
   return is_contained(AllS32Vectors, Ty) || is_contained(AllS64Vectors, Ty) ||
-         is_contained(AllScalarTypes, Ty) || is_contained(AllS16Vectors, Ty);
+         is_contained(AllScalarTypes, Ty) ||
+         (ST.useRealTrue16Insts() && Ty == S16) ||
+         is_contained(AllS16Vectors, Ty);
 }
 
-static LegalityPredicate isRegisterClassType(unsigned TypeIdx) {
-  return [TypeIdx](const LegalityQuery &Query) {
-    return isRegisterClassType(Query.Types[TypeIdx]);
+static LegalityPredicate isRegisterClassType(const GCNSubtarget &ST,
+                                             unsigned TypeIdx) {
+  return [&ST, TypeIdx](const LegalityQuery &Query) {
+    return isRegisterClassType(ST, Query.Types[TypeIdx]);
   };
 }
 
@@ -510,7 +516,7 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
 
 static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query) {
   const LLT Ty = Query.Types[0];
-  return isRegisterType(Ty) && isLoadStoreSizeLegal(ST, Query) &&
+  return isRegisterType(ST, Ty) && isLoadStoreSizeLegal(ST, Query) &&
          !hasBufferRsrcWorkaround(Ty) && !loadStoreBitcastWorkaround(Ty);
 }
 
@@ -523,12 +529,12 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
   if (Size != MemSizeInBits)
     return Size <= 32 && Ty.isVector();
 
-  if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty))
+  if (loadStoreBitcastWorkaround(Ty) && isRegisterType(ST, Ty))
     return true;
 
   // Don't try to handle bitcasting vector ext loads for now.
   return Ty.isVector() && (!MemTy.isVector() || MemTy == Ty) &&
-         (Size <= 32 || isRegisterSize(Size)) &&
+         (Size <= 32 || isRegisterSize(ST, Size)) &&
          !isRegisterVectorElementType(Ty.getElementType());
 }
 
@@ -875,7 +881,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
   getActionDefinitionsBuilder(G_BITCAST)
       // Don't worry about the size constraint.
-      .legalIf(all(isRegisterClassType(0), isRegisterClassType(1)))
+      .legalIf(all(isRegisterClassType(ST, 0), isRegisterClassType(ST, 1)))
       .lower();
 
   getActionDefinitionsBuilder(G_CONSTANT)
@@ -890,7 +896,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     .clampScalar(0, S16, S64);
 
   getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
-      .legalIf(isRegisterClassType(0))
+      .legalIf(isRegisterClassType(ST, 0))
       // s1 and s16 are special cases because they have legal operations on
       // them, but don't really occupy registers in the normal way.
       .legalFor({S1, S16})
@@ -1779,7 +1785,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     unsigned IdxTypeIdx = 2;
 
     getActionDefinitionsBuilder(Op)
-      .customIf([=](const LegalityQuery &Query) {
+        .customIf([=](const LegalityQuery &Query) {
           const LLT EltTy = Query.Types[EltTypeIdx];
           const LLT VecTy = Query.Types[VecTypeIdx];
           const LLT IdxTy = Query.Types[IdxTypeIdx];
@@ -1800,36 +1806,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
                   IdxTy.getSizeInBits() == 32 &&
                   isLegalVecType;
         })
-      .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
-                 bitcastToVectorElement32(VecTypeIdx))
-      //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
-      .bitcastIf(
-        all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltWiderThan(VecTypeIdx, 64)),
-        [=](const LegalityQuery &Query) {
-          // For > 64-bit element types, try to turn this into a 64-bit
-          // element vector since we may be able to do better indexing
-          // if this is scalar. If not, fall back to 32.
-          const LLT EltTy = Query.Types[EltTypeIdx];
-          const LLT VecTy = Query.Types[VecTypeIdx];
-          const unsigned DstEltSize = EltTy.getSizeInBits();
-          const unsigned VecSize = VecTy.getSizeInBits();
-
-          const unsigned TargetEltSize = DstEltSize % 64 == 0 ? 64 : 32;
-          return std::pair(
-              VecTypeIdx,
-              LLT::fixed_vector(VecSize / TargetEltSize, TargetEltSize));
-        })
-      .clampScalar(EltTypeIdx, S32, S64)
-      .clampScalar(VecTypeIdx, S32, S64)
-      .clampScalar(IdxTypeIdx, S32, S32)
-      .clampMaxNumElements(VecTypeIdx, S32, 32)
-      // TODO: Clamp elements for 64-bit vectors?
-      .moreElementsIf(
-        isIllegalRegisterType(VecTypeIdx),
-        moreElementsToNextExistingRegClass(VecTypeIdx))
-      // It should only be necessary with variable indexes.
-      // As a last resort, lower to the stack
-      .lower();
+        .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx),
+                       scalarOrEltNarrowerThan(VecTypeIdx, 32)),
+                   bitcastToVectorElement32(VecTypeIdx))
+        //.bitcastIf(vectorSmallerThan(1, 32), bitcastToScalar(1))
+        .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx),
+                       scalarOrEltWiderThan(VecTypeIdx, 64)),
+                   [=](const LegalityQuery &Query) {
+                     // For > 64-bit element types, try to turn this into a
+                     // 64-bit element vector since we may be able to do better
+                     // indexing if this is scalar. If not, fall back to 32.
+                     const LLT EltTy = Query.Types[EltTypeIdx];
+                     const LLT VecTy = Query.Types[VecTypeIdx];
+                     const unsigned DstEltSize = EltTy.getSizeInBits();
+                     const unsigned VecSize = VecTy.getSizeInBits();
+
+                     const unsigned TargetEltSize =
+                         DstEltSize % 64 == 0 ? 64 : 32;
+                     return std::pair(VecTypeIdx,
+                                      LLT::fixed_vector(VecSize / TargetEltSize,
+                                                        TargetEltSize));
+                   })
+        .clampScalar(EltTypeIdx, S32, S64)
+        .clampScalar(VecTypeIdx, S32, S64)
+        .clampScalar(IdxTypeIdx, S32, S32)
+        .clampMaxNumElements(VecTypeIdx, S32, 32)
+        // TODO: Clamp elements for 64-bit vectors?
+        .moreElementsIf(isIllegalRegisterType(ST, VecTypeIdx),
+                        moreElementsToNextExistingRegClass(VecTypeIdx))
+        // It should only be necessary with variable indexes.
+        // As a last resort, lower to the stack
+        .lower();
   }
 
   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -1876,15 +1883,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
 
   }
 
-  auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR)
-    .legalForCartesianProduct(AllS32Vectors, {S32})
-    .legalForCartesianProduct(AllS64Vectors, {S64})
-    .clampNumElements(0, V16S32, V32S32)
-    .clampNumElements(0, V2S64, V16S64)
-    .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16))
-    .moreElementsIf(
-      isIllegalRegisterType(0),
-      moreElementsToNextExistingRegClass(0));
+  auto &BuildVector =
+      getActionDefinitionsBuilder(G_BUILD_VECTOR)
+          .legalForCartesianProduct(AllS32Vectors, {S32})
+          .legalForCartesianProduct(AllS64Vectors, {S64})
+          .clampNumElements(0, V16S32, V32S32)
+          .clampNumElements(0, V2S64, V16S64)
+          .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16))
+          .moreElementsIf(isIllegalRegisterType(ST, 0),
+                          moreElementsToNextExistingRegClass(0));
 
   if (ST.hasScalarPackInsts()) {
     BuildVector
@@ -1904,14 +1911,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       .lower();
   }
 
-  BuildVector.legalIf(isRegisterType(0));
+  BuildVector.legalIf(isRegisterType(ST, 0));
 
   // FIXME: Clamp maximum size
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
-    .legalIf(all(isRegisterType(0), isRegisterType(1)))
-    .clampMaxNumElements(0, S32, 32)
-    .clampMaxNumElements(1, S16, 2) // TODO: Make 4?
-    .clampMaxNumElements(0, S16, 64);
+      .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1)))
+      .clampMaxNumElements(0, S32, 32)
+      .clampMaxNumElements(1, S16, 2) // TODO: Make 4?
+      .clampMaxNumElements(0, S16, 64);
 
   getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
 
@@ -1932,34 +1939,40 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       return false;
     };
 
-    auto &Builder = getActionDefinitionsBuilder(Op)
-      .legalIf(all(isRegisterType(0), isRegisterType(1)))
-      .lowerFor({{S16, V2S16}})
-      .lowerIf([=](const LegalityQuery &Query) {
-          const LLT BigTy = Query.Types[BigTyIdx];
-          return BigTy.getSizeInBits() == 32;
-        })
-      // Try to widen to s16 first for small types.
-      // TODO: Only do this on targets with legal s16 shifts
-      .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16)
-      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
-      .moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
-      .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
-                           elementTypeIs(1, S16)),
-                       changeTo(1, V2S16))
-      // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
-      // worth considering the multiples of 64 since 2*192 and 2*384 are not
-      // valid.
-      .clampScalar(LitTyIdx, S32, S512)
-      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
-      // Break up vectors with weird elements into scalars
-      .fewerElementsIf(
-        [=](const LegalityQuery &Query) { return notValidElt(Query, LitTyIdx); },
-        scalarize(0))
-      .fewerElementsIf(
-        [=](const LegalityQuery &Query) { return notValidElt(Query, BigTyIdx); },
-        scalarize(1))
-      .clampScalar(BigTyIdx, S32, MaxScalar);
+    auto &Builder =
+        getActionDefinitionsBuilder(Op)
+            .legalIf(all(isRegisterType(ST, 0), isRegisterType(ST, 1)))
+            .lowerFor({{S16, V2S16}})
+            .lowerIf([=](const LegalityQuery &Query) {
+              const LLT BigTy = Query.Types[BigTyIdx];
+              return BigTy.getSizeInBits() == 32;
+            })
+            // Try to widen to s16 first for small types.
+            // TODO: Only do this on targets with legal s16 shifts
+            .minScalarOrEltIf(scalarNarrowerThan(LitTyIdx, 16), LitTyIdx, S16)
+            .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+            .moreElementsIf(isSmallOddVector(BigTyIdx),
+                            oneMoreElement(BigTyIdx))
+            .fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
+                                 elementTypeIs(1, S16)),
+                             changeTo(1, V2S16))
+            // Clamp the little scalar to s8-s256 and make it a power of 2. It's
+            // not worth considering the multiples of 64 since 2*192 and 2*384
+            // are not valid.
+            .clampScalar(LitTyIdx, S32, S512)
+            .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
+            // Break up vectors with weird elements into scalars
+            .fewerElementsIf(
+                [=](const LegalityQuery &Query) {
+                  return notValidElt(Query, LitTyIdx);
+                },
+                scalarize(0))
+            .fewerElementsIf(
+                [=](const LegalityQuery &Query) {
+                  return notValidElt(Query, BigTyIdx);
+                },
+                scalarize(1))
+            .clampScalar(BigTyIdx, S32, MaxScalar);
 
     if (Op == G_MERGE_VALUES) {
       Builder.widenScalarIf(
@@ -3146,7 +3159,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
     } else {
       // Extract the subvector.
 
-      if (isRegisterType(ValTy)) {
+      if (isRegisterType(ST, ValTy)) {
         // If this a case where G_EXTRACT is legal, use it.
         // (e.g. <3 x s32> -> <4 x s32>)
         WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index 2d8dc9d47225e..1c1a6dac75a17 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -11,7 +11,7 @@ def SGPRRegBank : RegisterBank<"SGPR",
 >;
 
 def VGPRRegBank : RegisterBank<"VGPR",
-  [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
+  [VGPR_16_Lo128, VGPR_16, VGPR_32, VReg_64, VReg_96, VReg_128, VReg_160, VReg_192, VReg_224, VReg_256, VReg_288, VReg_320, VReg_352, VReg_384, VReg_512, VReg_1024]
 >;
 
 // It is helpful to distinguish conditions from ordinary SGPRs.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 71c720ed09b5f..e365690f8b4dc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -35,7 +35,7 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
   cl::ReallyHidden,
   cl::init(true));
 
-std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
+std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
 std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
 
 // Map numbers of DWORDs to indexes in SubRegFromChannelTable.
@@ -351,9 +351,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
   static auto InitializeRegSplitPartsOnce = [this]() {
     for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
       unsigned Size = getSubRegIdxSize(Idx);
-      if (Size & 31)
+      if (Size & 15)
         continue;
-      std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
+      std::vector<int16_t> &Vec = RegSplitParts[Size / 16 - 1];
       unsigned Pos = getSubRegIdxOffset(Idx);
       if (Pos % Size)
         continue;
@@ -3554,14 +3554,14 @@ bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI,
 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
                                                    unsigned EltSize) const {
   const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
-  assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
+  assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
 
-  const unsigned RegDWORDs = RegBitWidth / 32;
-  const unsigned EltDWORDs = EltSize / 4;
-  assert(RegSplitParts.size() + 1 >= EltDWORDs);
+  const unsigned RegHalves = RegBitWidth / 16;
+  const unsigned EltHalves = EltSize / 2;
+  assert(RegSplitParts.size() + 1 >= EltHalves);
 
-  const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
-  const unsigned NumParts = RegDWORDs / EltDWORDs;
+  const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
+  const unsigned NumParts = RegHalves / EltHalves;
 
   return ArrayRef(Parts.data(), NumParts);
 }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index a434efb70d052..a64180daea2ad 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -37,11 +37,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   BitVector RegPressureIgnoredUnits;
 
   /// Sub reg indexes for getRegSplitParts.
-  /// First index represents subreg size from 1 to 16 DWORDs.
+  /// First index represents subreg size from 1 to 32 Half DWORDS.
   /// The inner vector is sorted by bit offset.
   /// Provided a register can be fully split with given subregs,
   /// all elements of the inner vector combined give a full lane mask.
-  static std::array<std::vector<int16_t>, 16> RegSplitParts;
+  static std::array<std::vector<int16_t>, 32> RegSplitParts;
 
   // Table representing sub reg of given width and offset.
   // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c521d0dd3ad2d..6a92e54b69edc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2483,6 +2483,8 @@ bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 // (move from MC* level to Target* level). Return size in bits.
 unsigned getRegBitWidth(unsigned RCID) {
   switch (RCID) {
+  case AMDGPU::VGPR_16RegClassID:
+  case AMDGPU::VGPR_16_Lo128RegClassID:
   case AMDGPU::SGPR_LO16RegClassID:
   case AMDGPU::AGPR_LO16RegClassID:
     return 16;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 3d7fec9a5986c..2389924b82484 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefix...
[truncated]

broxigarchen · 2025-02-21T22:05:18Z

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

@@ -1800,36 +1806,37 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
                  IdxTy.getSizeInBits() == 32 &&
                  isLegalVecType;
        })
-      .bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),


some of the changes in this patch are pure clang-format change

llvm-ci · 2025-02-25T22:19:48Z

LLVM Buildbot has detected a new failure on builder openmp-offload-sles-build-only running on rocm-worker-hw-04-sles while building llvm at step 8 "Add check check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/17703

Here is the relevant piece of the build log for the reference

Step 8 (Add check check-llvm) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/fmed3.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 3: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 4: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs
RUN: at line 5: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs
RUN: at line 6: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 7: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 8: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 9: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 10: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 11: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs
/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll:7585:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
                           ^
<stdin>:3095:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
<stdin>:3096:8: note: possible intended match here
 global_load_d16_b16 v0, v1, s[2:3]
       ^
/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/llvm/test/CodeGen/AMDGPU/fmed3.ll:7840:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc
                           ^
<stdin>:3158:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
...

llvm-ci · 2025-02-25T22:25:33Z

LLVM Buildbot has detected a new failure on builder ml-opt-devrel-x86-64 running on ml-opt-devrel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/13796

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/fmed3.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 3: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 4: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs
RUN: at line 5: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs
RUN: at line 6: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs
RUN: at line 7: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs
RUN: at line 8: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 9: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 10: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs
RUN: at line 11: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs
/b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7585:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
                           ^
<stdin>:3095:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
<stdin>:3096:8: note: possible intended match here
 global_load_d16_b16 v0, v1, s[2:3]
       ^
/b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7840:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc
                           ^
<stdin>:3158:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
...

llvm-ci · 2025-02-25T22:26:14Z

LLVM Buildbot has detected a new failure on builder ml-opt-rel-x86-64 running on ml-opt-rel-x86-64-b2 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/13743

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/fmed3.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 3: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 4: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 5: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs
RUN: at line 6: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs
RUN: at line 7: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs
RUN: at line 8: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 9: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 10: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs
RUN: at line 11: /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7585:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
                           ^
<stdin>:3095:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
<stdin>:3096:8: note: possible intended match here
 global_load_d16_b16 v0, v1, s[2:3]
       ^
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7840:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc
                           ^
<stdin>:3158:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
...

llvm-ci · 2025-02-25T22:27:12Z

LLVM Buildbot has detected a new failure on builder ml-opt-dev-x86-64 running on ml-opt-dev-x86-64-b2 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/13987

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/fmed3.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 3: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs
RUN: at line 4: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs
RUN: at line 5: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs
RUN: at line 6: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs
RUN: at line 7: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs
RUN: at line 8: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 9: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 10: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs
RUN: at line 11: /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs
/b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7585:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
                           ^
<stdin>:3095:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
<stdin>:3096:8: note: possible intended match here
 global_load_d16_b16 v0, v1, s[2:3]
       ^
/b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7840:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc
                           ^
<stdin>:3158:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
...

broxigarchen · 2025-02-25T22:30:02Z

There is a test failure after this patch is merged. There seems to be caused by updates in other PR. Getting a quick fix up right now

broxigarchen · 2025-02-25T23:01:13Z

Set up a PR for the test fix and waiting for CI to pass #128784

dyung · 2025-02-25T23:04:38Z

Hi @broxigarchen, not sure if the buildbot notified you, but I'm seeing 5 test failures on a bot which I suspect were due to your change. Can you take a look?

https://lab.llvm.org/buildbot/#/builders/174/builds/13564

broxigarchen · 2025-02-25T23:07:10Z

Hi @broxigarchen, not sure if the buildbot notified you, but I'm seeing 5 test failures on a bot which I suspect were due to your change. Can you take a look?

https://lab.llvm.org/buildbot/#/builders/174/builds/13564

Hi yung yes I am updating it right now. Thanks for the heads up

…on (#128784) This is a NFC change. Update the test file and fix the build #128233 is causing a build issue. This is caused by PR #127945 being merged while the 128233 is pending for review.

llvm-ci · 2025-02-26T01:38:35Z

LLVM Buildbot has detected a new failure on builder lld-x86_64-ubuntu-fast running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/11954

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/fmed3.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs
RUN: at line 3: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 4: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs
RUN: at line 5: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs
RUN: at line 6: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs
RUN: at line 7: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 8: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 9: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 10: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs
RUN: at line 11: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs
/home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7585:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
                           ^
<stdin>:3095:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
<stdin>:3096:8: note: possible intended match here
 global_load_d16_b16 v0, v1, s[2:3]
       ^
/home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7840:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc
                           ^
<stdin>:3158:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
...

…e16 selection (#128784) This is a NFC change. Update the test file and fix the build llvm/llvm-project#128233 is causing a build issue. This is caused by PR llvm/llvm-project#127945 being merged while the 128233 is pending for review.

vitalybuka · 2025-02-26T02:43:53Z

Breaks a lot of bots https://lab.llvm.org/buildbot/#/builders/52/builds/6332

vitalybuka · 2025-02-26T02:45:21Z

Breaks a lot of bots https://lab.llvm.org/buildbot/#/builders/52/builds/6332

Sorry, I see the fix

broxigarchen · 2025-02-26T02:45:32Z

The fixed has been merged here #128784

llvm-ci · 2025-02-26T08:55:55Z

LLVM Buildbot has detected a new failure on builder llvm-x86_64-debian-dylib running on gribozavr4 while building llvm at step 7 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/20458

Here is the relevant piece of the build log for the reference

Step 7 (test-build-unified-tree-check-llvm) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/minimummaximum.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-x86_64-debian-dylib/build/bin/llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck -check-prefixes=GFX12,SDAG,SDAG-TRUE16 /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs
RUN: at line 3: /b/1/llvm-x86_64-debian-dylib/build/bin/llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck -check-prefixes=GFX12,SDAG,SDAG-FAKE16 /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs
RUN: at line 4: /b/1/llvm-x86_64-debian-dylib/build/bin/llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck -check-prefixes=GFX12,GISEL,GISEL-TRUE16 /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs
/b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll:187:15: error: GISEL-NEXT: is not on the line after the previous match
; GISEL-NEXT: s_mov_b32 s6, s3
              ^
<stdin>:495:2: note: 'next' match was here
 s_mov_b32 s6, s3
 ^
<stdin>:493:26: note: previous match ended here
 s_maximum_f16 s0, s0, s1
                         ^
<stdin>:494:1: note: non-matching line after previous match is here
 v_mov_b32_e32 v1, 0
^

Input file: <stdin>
Check file: /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/CodeGen/AMDGPU/minimummaximum.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
          .
          .
          .
        490:  .type s_test_minmax_f16,@function 
        491: s_test_minmax_f16: ; @s_test_minmax_f16 
        492: ; %bb.0: 
        493:  s_maximum_f16 s0, s0, s1 
        494:  v_mov_b32_e32 v1, 0 
        495:  s_mov_b32 s6, s3 
next:187      !~~~~~~~~~~~~~~~  error: match on wrong line
        496:  s_mov_b32 s7, s4 
        497:  s_minimum_f16 s0, s0, s2 
        498:  s_delay_alu instid0(SALU_CYCLE_3) 
        499:  v_mov_b16_e32 v0.l, s0 
        500:  global_store_b16 v1, v0, s[6:7] 
          .
...

llvm-ci · 2025-02-26T10:59:07Z

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/23923

Here is the relevant piece of the build log for the reference

Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/AMDGPU/fmed3.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 3: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 4: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-SDAG /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 5: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=VI-GISEL /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 6: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 7: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 8: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 9: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 10: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
RUN: at line 11: /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
+ /build/buildbot/premerge-monolithic-linux/build/bin/llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 /build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll
/build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7585:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
                           ^
<stdin>:3095:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
<stdin>:3096:8: note: possible intended match here
 global_load_d16_b16 v0, v1, s[2:3]
       ^
/build/buildbot/premerge-monolithic-linux/llvm-project/llvm/test/CodeGen/AMDGPU/fmed3.ll:7840:28: error: GFX11-GISEL-TRUE16-NEXT: expected string not found in input
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc
                           ^
<stdin>:3158:22: note: scanning from here
 s_waitcnt lgkmcnt(0)
                     ^
...

Added VGPR_16 to GISEL register bank, support uaddsat/usubsat gisel

8f73cc4

broxigarchen marked this pull request as ready for review February 21, 2025 22:01

llvmbot added backend:AMDGPU llvm:globalisel labels Feb 21, 2025

remove build vector change

c99756b

broxigarchen commented Feb 21, 2025

View reviewed changes

broxigarchen force-pushed the main-merge-true16-codegen-addsub branch from e6e1151 to c99756b Compare February 21, 2025 22:06

broxigarchen requested review from arsenm, kosarev, Sisyph and jayfoad February 21, 2025 22:06

arsenm approved these changes Feb 24, 2025

View reviewed changes

broxigarchen merged commit e6f6a1e into llvm:main Feb 25, 2025
11 checks passed

broxigarchen mentioned this pull request Feb 25, 2025

[AMDGPU][True16][CodeGen] test fix for uaddsat/usubsat true16 selection #128784

Merged

[AMDGPU][True16][CodeGen] uaddsat/usubsat true16 selection in gisel #128233

[AMDGPU][True16][CodeGen] uaddsat/usubsat true16 selection in gisel #128233

Uh oh!

Conversation

broxigarchen commented Feb 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Feb 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 21, 2025

Uh oh!

llvmbot commented Feb 21, 2025

Uh oh!

broxigarchen Feb 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Feb 25, 2025

Uh oh!

llvm-ci commented Feb 25, 2025

Uh oh!

llvm-ci commented Feb 25, 2025

Uh oh!

llvm-ci commented Feb 25, 2025

Uh oh!

broxigarchen commented Feb 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

broxigarchen commented Feb 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

dyung commented Feb 25, 2025

Uh oh!

broxigarchen commented Feb 25, 2025

Uh oh!

llvm-ci commented Feb 26, 2025

Uh oh!

vitalybuka commented Feb 26, 2025

Uh oh!

vitalybuka commented Feb 26, 2025

Uh oh!

broxigarchen commented Feb 26, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvm-ci commented Feb 26, 2025

Uh oh!

llvm-ci commented Feb 26, 2025

Uh oh!

Uh oh!

broxigarchen commented Feb 21, 2025 •

edited

Loading

github-actions bot commented Feb 21, 2025 •

edited

Loading

broxigarchen Feb 21, 2025 •

edited

Loading

broxigarchen commented Feb 25, 2025 •

edited

Loading

broxigarchen commented Feb 25, 2025 •

edited

Loading

broxigarchen commented Feb 26, 2025 •

edited

Loading