Merge branch 'release/20.x' into bitcast-fix-llvm20

yonghong-song · web-flow · commit b5ba7d3e04ac · 2025-03-14T19:49:42.000-07:00
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -1058,6 +1058,7 @@ Bug Fixes to C++ Support
 - Fixed a substitution bug in transforming CTAD aliases when the type alias contains a non-pack template argument
   corresponding to a pack parameter (#GH124715)
 - Clang is now better at keeping track of friend function template instance contexts. (#GH55509)
+- Fixed an integer overflow bug in computing template parameter depths when synthesizing CTAD guides. (#GH128691)
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp
@@ -377,8 +377,15 @@ struct ConvertConstructorToDeductionGuideTransform {
         if (NestedPattern)
           Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth());
         auto [Depth, Index] = getDepthAndIndex(Param);
+        // Depth can still be 0 if FTD belongs to an explicit class template
+        // specialization with an empty template parameter list. In that case,
+        // we don't want the NewDepth to overflow, and it should remain 0.
+        assert(Depth ||
+               cast<ClassTemplateSpecializationDecl>(FTD->getDeclContext())
+                   ->isExplicitSpecialization());
         NamedDecl *NewParam = transformTemplateParameter(
-            SemaRef, DC, Param, Args, Index + Depth1IndexAdjustment, Depth - 1);
+            SemaRef, DC, Param, Args, Index + Depth1IndexAdjustment,
+            Depth ? Depth - 1 : 0);
         if (!NewParam)
           return nullptr;
         // Constraints require that we substitute depth-1 arguments
diff --git a/clang/test/SemaTemplate/deduction-guide.cpp b/clang/test/SemaTemplate/deduction-guide.cpp
@@ -691,3 +691,35 @@ Test test(42);
 // CHECK-NEXT: | `-ParmVarDecl {{.*}} 'auto:1'
 
 } // namespace GH122134
+
+namespace GH128691 {
+
+template <typename = void>
+class NewDeleteAllocator;
+
+template <>
+struct NewDeleteAllocator<> {
+  template <typename T>
+  NewDeleteAllocator(T); // expected-note {{candidate template ignored}} \
+                         // expected-note {{implicit deduction guide declared as}}
+};
+
+template <typename>
+struct NewDeleteAllocator : NewDeleteAllocator<> { // expected-note {{candidate template ignored}} \
+                                                   // expected-note {{implicit deduction guide declared as}}
+  using NewDeleteAllocator<>::NewDeleteAllocator;
+};
+
+void test() { NewDeleteAllocator abc(42); } // expected-error {{no viable constructor or deduction guide}}
+
+// CHECK-LABEL: Dumping GH128691::<deduction guide for NewDeleteAllocator>:
+// CHECK-NEXT: FunctionTemplateDecl {{.+}} <deduction guide for NewDeleteAllocator>
+// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} typename depth 0 index 0
+// CHECK-NEXT: | `-TemplateArgument type 'void'
+// CHECK-NEXT: |   |-inherited from TemplateTypeParm {{.+}} depth 0 index 0
+// CHECK-NEXT: |   `-BuiltinType {{.+}} 'void'
+// CHECK-NEXT: |-TemplateTypeParmDecl {{.+}} typename depth 0 index 1 T
+// CHECK-NEXT: `-CXXDeductionGuideDecl {{.+}} <deduction guide for NewDeleteAllocator> 'auto (T) -> NewDeleteAllocator<type-parameter-0-0>'
+// CHECK-NEXT:  `-ParmVarDecl {{.+}} 'T'
+
+} // namespace GH128691
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -10231,6 +10231,11 @@ static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS) {
 void SystemZTargetLowering::
 verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const Function *F, SDValue Callee) const {
+  // Temporarily only do the check when explicitly requested, until it can be
+  // enabled by default.
+  if (!EnableIntArgExtCheck)
+    return;
+
   bool IsInternal = false;
   const Function *CalleeFn = nullptr;
   if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
@@ -10252,6 +10257,11 @@ verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
 void SystemZTargetLowering::
 verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
                             const Function *F) const {
+  // Temporarily only do the check when explicitly requested, until it can be
+  // enabled by default.
+  if (!EnableIntArgExtCheck)
+    return;
+
   if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) {
     errs() << "ERROR: Missing extension attribute of returned "
            << "value from function:\n";
@@ -10268,11 +10278,6 @@ verifyNarrowIntegerArgs(const SmallVectorImpl<ISD::OutputArg> &Outs,
   if (IsInternal || !Subtarget.isTargetELF())
     return true;
 
-  // Temporarily only do the check when explicitly requested, until it can be
-  // enabled by default.
-  if (!EnableIntArgExtCheck)
-    return true;
-
   if (EnableIntArgExtCheck.getNumOccurrences()) {
     if (!EnableIntArgExtCheck)
       return true;
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -638,17 +638,19 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
       (EnableMemCpyOptWithoutLibcalls ||
        (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
     MemoryLocation LoadLoc = MemoryLocation::get(LI);
-    MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
-                   *StoreAccess = MSSA->getMemoryAccess(SI);
-
-    // We use MSSA to check if an instruction may store to the memory we load
-    // from in between the load and the store. If such an instruction is found,
-    // we try to promote there instead of at the store position.
-    auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
-        StoreAccess->getDefiningAccess(), LoadLoc, BAA);
-    Instruction *P = MSSA->dominates(LoadAccess, Clobber)
-                         ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst()
-                         : SI;
+
+    // We use alias analysis to check if an instruction may store to
+    // the memory we load from in between the load and the store. If
+    // such an instruction is found, we try to promote there instead
+    // of at the store position.
+    // TODO: Can use MSSA for this.
+    Instruction *P = SI;
+    for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
+      if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
+        P = &I;
+        break;
+      }
+    }
 
     // If we found an instruction that may write to the loaded memory,
     // we can try to promote at this position instead of the store
diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -51,8 +51,8 @@ define void @destroysrc(ptr %src, ptr %dst) {
 
 define void @destroynoaliassrc(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @destroynoaliassrc(
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST:%.*]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -79,9 +79,9 @@ define void @copyalias(ptr %src, ptr %dst) {
 ; sure we lift the computation as well if needed and possible.
 define void @addrproducer(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @addrproducer(
-; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST]], i64 1
+; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i64 1
 ; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST:%.*]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[DST]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -113,8 +113,8 @@ define void @noaliasaddrproducer(ptr %src, ptr noalias %dst, ptr noalias %dstidp
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[DSTIDPTR:%.*]], align 4
 ; CHECK-NEXT:    [[DSTINDEX:%.*]] = or i32 [[TMP2]], 1
 ; CHECK-NEXT:    [[DST2:%.*]] = getelementptr [[S:%.*]], ptr [[DST:%.*]], i32 [[DSTINDEX]]
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC]], i64 16, i1 false)
-; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC:%.*]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DST2]], ptr align 8 [[SRC:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load %S, ptr %src
@@ -130,7 +130,7 @@ define void @throwing_call(ptr noalias %src, ptr %dst) {
 ; CHECK-LABEL: @throwing_call(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load [[S:%.*]], ptr [[SRC:%.*]], align 8
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[SRC]], i8 0, i64 16, i1 false)
-; CHECK-NEXT:    call void @call() [[ATTR2:#.*]]
+; CHECK-NEXT:    call void @call() #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    store [[S]] [[TMP1]], ptr [[DST:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -156,4 +156,30 @@ loop:
   br label %loop
 }
 
+; There are multiple instructions that can clobber the source memory here.
+; We can move the dest write past the store to %ptr.24, but not the memcpy.
+; Make sure we don't perform fca2memcpy conversion in this case.
+define void @multiple_clobbering(ptr %ptr, ptr %ptr.copy) {
+; CHECK-LABEL: @multiple_clobbering(
+; CHECK-NEXT:    [[PTR_8:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR:%.*]], i64 8
+; CHECK-NEXT:    [[PTR_24:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 24
+; CHECK-NEXT:    [[PTR_32:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 32
+; CHECK-NEXT:    [[PTR_COPY_8:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_COPY:%.*]], i64 8
+; CHECK-NEXT:    [[STRUCT:%.*]] = load { i32, i64 }, ptr [[PTR_COPY_8]], align 8
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[PTR_8]], ptr [[PTR_32]], i64 12, i1 false)
+; CHECK-NEXT:    store i64 1, ptr [[PTR_24]], align 8
+; CHECK-NEXT:    store { i32, i64 } [[STRUCT]], ptr [[PTR_32]], align 8
+; CHECK-NEXT:    ret void
+;
+  %ptr.8 = getelementptr inbounds nuw i8, ptr %ptr, i64 8
+  %ptr.24 = getelementptr inbounds nuw i8, ptr %ptr, i64 24
+  %ptr.32 = getelementptr inbounds nuw i8, ptr %ptr, i64 32
+  %ptr.copy.8 = getelementptr inbounds nuw i8, ptr %ptr.copy, i64 8
+  %struct = load { i32, i64 }, ptr %ptr.copy.8, align 8
+  call void @llvm.memcpy.p0.p0.i64(ptr %ptr.8, ptr %ptr.32, i64 12, i1 false)
+  store i64 1, ptr %ptr.24, align 8
+  store { i32, i64 } %struct, ptr %ptr.32, align 8
+  ret void
+}
+
 declare void @call()