intel
diff --git a/‎.ci/generate-buildkite-pipeline-premerge
Lines changed: 4 additions & 1 deletion b/‎.ci/generate-buildkite-pipeline-premerge
Lines changed: 4 additions & 1 deletion
diff --git a/‎.github/workflows/approved-prs.yml
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/approved-prs.yml
Lines changed: 39 additions & 0 deletions
diff --git a/‎clang/docs/LanguageExtensions.rst
Lines changed: 33 additions & 0 deletions b/‎clang/docs/LanguageExtensions.rst
Lines changed: 33 additions & 0 deletions
diff --git a/‎clang/docs/ReleaseNotes.rst
Lines changed: 5 additions & 0 deletions b/‎clang/docs/ReleaseNotes.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎clang/include/clang/Basic/Builtins.h
Lines changed: 0 additions & 4 deletions b/‎clang/include/clang/Basic/Builtins.h
Lines changed: 0 additions & 4 deletions
diff --git a/‎clang/include/clang/Basic/Builtins.td
Lines changed: 6 additions & 0 deletions b/‎clang/include/clang/Basic/Builtins.td
Lines changed: 6 additions & 0 deletions
diff --git a/‎clang/include/clang/Driver/Options.td
Lines changed: 3 additions & 3 deletions b/‎clang/include/clang/Driver/Options.td
Lines changed: 3 additions & 3 deletions
diff --git a/‎clang/lib/Basic/Targets/AArch64.cpp
Lines changed: 14 additions & 2 deletions b/‎clang/lib/Basic/Targets/AArch64.cpp
Lines changed: 14 additions & 2 deletions
diff --git a/‎clang/lib/Basic/Targets/ARM.cpp
Lines changed: 11 additions & 3 deletions b/‎clang/lib/Basic/Targets/ARM.cpp
Lines changed: 11 additions & 3 deletions
diff --git a/‎clang/lib/Basic/Targets/ARM.h
Lines changed: 1 addition & 0 deletions b/‎clang/lib/Basic/Targets/ARM.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎clang/lib/CodeGen/CGBuiltin.cpp
Lines changed: 4 additions & 0 deletions b/‎clang/lib/CodeGen/CGBuiltin.cpp
Lines changed: 4 additions & 0 deletions
diff --git a/‎clang/lib/CodeGen/CGCall.cpp
Lines changed: 25 additions & 25 deletions b/‎clang/lib/CodeGen/CGCall.cpp
Lines changed: 25 additions & 25 deletions
diff --git a/‎clang/lib/CodeGen/CGExprScalar.cpp
Lines changed: 25 additions & 26 deletions b/‎clang/lib/CodeGen/CGExprScalar.cpp
Lines changed: 25 additions & 26 deletions
@@ -233,7 +233,10 @@ linux_projects=$(add-dependencies ${linux_projects_to_test} | sort | uniq)
 
 windows_projects_to_test=$(exclude-windows $(compute-projects-to-test ${modified_projects}))
 windows_check_targets=$(check-targets ${windows_projects_to_test} | sort | uniq)
-windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq)
+# Temporary disable the windows job.
+# See https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840
+#windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq)
+windows_projects=""
 
 # Generate the appropriate pipeline
 if [[ "${linux_projects}" != "" ]]; then
 
@@ -0,0 +1,39 @@
+name: "Prompt reviewers to merge PRs on behalf of authors"
+
+permissions:
+  contents: read
+
+on:
+  pull_request_review:
+    types:
+      - submitted
+
+jobs:
+  merge-on-behalf-information-comment:
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+    if: >-
+      (github.repository == 'llvm/llvm-project') &&
+      (github.event.review.state == 'APPROVED')
+    steps:
+      - name: Checkout Automation Script
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: llvm/utils/git/
+          ref: main
+
+      - name: Setup Automation Script
+        working-directory: ./llvm/utils/git/
+        run: |
+          pip install -r requirements.txt
+
+      - name: Add Merge On Behalf Comment
+        working-directory: ./llvm/utils/git/
+        run: |
+          python3 ./github-automation.py \
+            --token '${{ secrets.GITHUB_TOKEN }}' \
+            pr-merge-on-behalf-information \
+            --issue-number "${{ github.event.pull_request.number }}" \
+            --author "${{ github.event.pull_request.user.login }}" \
+            --reviewer "${{ github.event.review.user.login }}"
@@ -2764,6 +2764,39 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note
 that even if present, its use may depend on run-time privilege or other OS
 controlled state.
 
+``__builtin_readsteadycounter``
+-------------------------------
+
+``__builtin_readsteadycounter`` is used to access the fixed frequency counter
+register (or a similar steady-rate clock) on those targets that support it.
+The function is similar to ``__builtin_readcyclecounter`` above except that the
+frequency is fixed, making it suitable for measuring elapsed time.
+
+**Syntax**:
+
+.. code-block:: c++
+
+  __builtin_readsteadycounter()
+
+**Example of Use**:
+
+.. code-block:: c++
+
+  unsigned long long t0 = __builtin_readsteadycounter();
+  do_something();
+  unsigned long long t1 = __builtin_readsteadycounter();
+  unsigned long long secs_to_do_something = (t1 - t0) / tick_rate;
+
+**Description**:
+
+The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value.
+When not supported by the target, the return value is always zero. This builtin
+takes no arguments and produces an unsigned long long result. The builtin does 
+not guarantee any particular frequency, only that it is stable. Knowledge of the 
+counter's true frequency will need to be provided by the user.
+
+Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``.
+
 ``__builtin_dump_struct``
 -------------------------
 
 
@@ -117,6 +117,9 @@ C23 Feature Support
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
+- Added ``__builtin_readsteadycounter`` for reading fixed frequency hardware
+  counters.
+
 New Compiler Flags
 ------------------
 
@@ -255,6 +258,8 @@ X86 Support
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
+- Fixed the incorrect definition of the __ARM_ARCH macro for architectures greater than or equal to v8.1.
+
 Android Support
 ^^^^^^^^^^^^^^^
 
 
@@ -20,10 +20,6 @@
 #include "llvm/ADT/StringRef.h"
 #include <cstring>
 
-// VC++ defines 'alloca' as an object-like macro, which interferes with our
-// builtins.
-#undef alloca
-
 namespace clang {
 class TargetInfo;
 class IdentifierTable;
 
@@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin {
   let Prototype = "unsigned long long int()";
 }
 
+def ReadSteadyCounter : Builtin {
+  let Spellings = ["__builtin_readsteadycounter"];
+  let Attributes = [NoThrow];
+  let Prototype = "unsigned long long int()";
+}
+
 def Trap : Builtin {
   let Spellings = ["__builtin_trap"];
   let Attributes = [NoThrow, NoReturn];
 
@@ -5612,7 +5612,7 @@ def print_rocm_search_dirs : Flag<["-", "--"], "print-rocm-search-dirs">,
   HelpText<"Print the paths used for finding ROCm installation">,
   Visibility<[ClangOption, CLOption]>;
 def print_runtime_dir : Flag<["-", "--"], "print-runtime-dir">,
-  HelpText<"Print the directory pathname containing clangs runtime libraries">,
+  HelpText<"Print the directory pathname containing Clang's runtime libraries">,
   Visibility<[ClangOption, CLOption]>;
 def print_diagnostic_options : Flag<["-", "--"], "print-diagnostic-options">,
   HelpText<"Print all of Clang's warning options">,
@@ -8860,8 +8860,8 @@ def _SLASH_ZW : CLJoined<"ZW">;
 // clang-dxc Options
 //===----------------------------------------------------------------------===//
 
-def dxc_Group : OptionGroup<"<clang-dxc options>">, Visibility<[DXCOption]>,
-  HelpText<"dxc compatibility options">;
+def dxc_Group : OptionGroup<"clang-dxc options">, Visibility<[DXCOption]>,
+  HelpText<"dxc compatibility options.">;
 class DXCFlag<string name> : Option<["/", "-"], name, KIND_FLAG>,
   Group<dxc_Group>, Visibility<[DXCOption]>;
 class DXCJoinedOrSeparate<string name> : Option<["/", "-"], name,
 
@@ -367,8 +367,20 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
 
   // ACLE predefines. Many can only have one possible value on v8 AArch64.
   Builder.defineMacro("__ARM_ACLE", "200");
-  Builder.defineMacro("__ARM_ARCH",
-                      std::to_string(ArchInfo->Version.getMajor()));
+
+  // __ARM_ARCH is defined as an integer value indicating the current ARM ISA.
+  // For ISAs up to and including v8, __ARM_ARCH is equal to the major version
+  // number. For ISAs from v8.1 onwards, __ARM_ARCH is scaled up to include the
+  // minor version number, e.g. for ARM architecture ARMvX.Y:
+  // __ARM_ARCH = X * 100 + Y.
+  if (ArchInfo->Version.getMajor() == 8 && ArchInfo->Version.getMinor() == 0)
+    Builder.defineMacro("__ARM_ARCH",
+                        std::to_string(ArchInfo->Version.getMajor()));
+  else
+    Builder.defineMacro("__ARM_ARCH",
+                        std::to_string(ArchInfo->Version.getMajor() * 100 +
+                                       ArchInfo->Version.getMinor().value()));
+
   Builder.defineMacro("__ARM_ARCH_PROFILE",
                       std::string("'") + (char)ArchInfo->Profile + "'");
 
 
@@ -130,6 +130,7 @@ void ARMTargetInfo::setArchInfo(llvm::ARM::ArchKind Kind) {
   SubArch = llvm::ARM::getSubArch(ArchKind);
   ArchProfile = llvm::ARM::parseArchProfile(SubArch);
   ArchVersion = llvm::ARM::parseArchVersion(SubArch);
+  ArchMinorVersion = llvm::ARM::parseArchMinorVersion(SubArch);
 
   // cache CPU related strings
   CPUAttr = getCPUAttr();
@@ -736,9 +737,16 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
   if (!CPUAttr.empty())
     Builder.defineMacro("__ARM_ARCH_" + CPUAttr + "__");
 
-  // ACLE 6.4.1 ARM/Thumb instruction set architecture
-  // __ARM_ARCH is defined as an integer value indicating the current ARM ISA
-  Builder.defineMacro("__ARM_ARCH", Twine(ArchVersion));
+  // __ARM_ARCH is defined as an integer value indicating the current ARM ISA.
+  // For ISAs up to and including v8, __ARM_ARCH is equal to the major version
+  // number. For ISAs from v8.1 onwards, __ARM_ARCH is scaled up to include the
+  // minor version number, e.g. for ARM architecture ARMvX.Y:
+  // __ARM_ARCH = X * 100 + Y.
+  if (ArchVersion >= 9 || ArchMinorVersion != 0)
+    Builder.defineMacro("__ARM_ARCH",
+                        Twine(ArchVersion * 100 + ArchMinorVersion));
+  else
+    Builder.defineMacro("__ARM_ARCH", Twine(ArchVersion));
 
   if (ArchVersion >= 8) {
     // ACLE 6.5.7 Crypto Extension
 
@@ -60,6 +60,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
   llvm::ARM::ArchKind ArchKind = llvm::ARM::ArchKind::ARMV4T;
   llvm::ARM::ProfileKind ArchProfile;
   unsigned ArchVersion;
+  unsigned ArchMinorVersion;
 
   LLVM_PREFERRED_TYPE(FPUMode)
   unsigned FPU : 5;
 
@@ -3523,6 +3523,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
     return RValue::get(Builder.CreateCall(F));
   }
+  case Builtin::BI__builtin_readsteadycounter: {
+    Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
+    return RValue::get(Builder.CreateCall(F));
+  }
   case Builtin::BI__builtin___clear_cache: {
     Value *Begin = EmitScalarExpr(E->getArg(0));
     Value *End = EmitScalarExpr(E->getArg(1));
 
@@ -1310,27 +1310,25 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
   // If coercing a fixed vector to a scalable vector for ABI compatibility, and
   // the types match, use the llvm.vector.insert intrinsic to perform the
   // conversion.
-  if (auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(Ty)) {
-    if (auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
-      // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+  if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(Ty)) {
+    if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
+      // If we are casting a fixed i8 vector to a scalable i1 predicate
       // vector, use a vector insert and bitcast the result.
-      bool NeedsBitcast = false;
-      auto PredType =
-          llvm::ScalableVectorType::get(CGF.Builder.getInt1Ty(), 16);
-      llvm::Type *OrigType = Ty;
-      if (ScalableDst == PredType &&
-          FixedSrc->getElementType() == CGF.Builder.getInt8Ty()) {
-        ScalableDst = llvm::ScalableVectorType::get(CGF.Builder.getInt8Ty(), 2);
-        NeedsBitcast = true;
+      if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
+          ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
+          FixedSrcTy->getElementType()->isIntegerTy(8)) {
+        ScalableDstTy = llvm::ScalableVectorType::get(
+            FixedSrcTy->getElementType(),
+            ScalableDstTy->getElementCount().getKnownMinValue() / 8);
       }
-      if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
+      if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) {
         auto *Load = CGF.Builder.CreateLoad(Src);
-        auto *UndefVec = llvm::UndefValue::get(ScalableDst);
+        auto *UndefVec = llvm::UndefValue::get(ScalableDstTy);
         auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
         llvm::Value *Result = CGF.Builder.CreateInsertVector(
-            ScalableDst, UndefVec, Load, Zero, "cast.scalable");
-        if (NeedsBitcast)
-          Result = CGF.Builder.CreateBitCast(Result, OrigType);
+            ScalableDstTy, UndefVec, Load, Zero, "cast.scalable");
+        if (ScalableDstTy != Ty)
+          Result = CGF.Builder.CreateBitCast(Result, Ty);
         return Result;
       }
     }
@@ -3281,13 +3279,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         llvm::Value *Coerced = Fn->getArg(FirstIRArg);
         if (auto *VecTyFrom =
                 dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
-          // If we are casting a scalable 16 x i1 predicate vector to a fixed i8
+          // If we are casting a scalable i1 predicate vector to a fixed i8
           // vector, bitcast the source and use a vector extract.
-          auto PredType =
-              llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
-          if (VecTyFrom == PredType &&
+          if (VecTyFrom->getElementType()->isIntegerTy(1) &&
+              VecTyFrom->getElementCount().isKnownMultipleOf(8) &&
               VecTyTo->getElementType() == Builder.getInt8Ty()) {
-            VecTyFrom = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
+            VecTyFrom = llvm::ScalableVectorType::get(
+                VecTyTo->getElementType(),
+                VecTyFrom->getElementCount().getKnownMinValue() / 8);
             Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
           }
           if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
@@ -5984,12 +5983,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       // If coercing a fixed vector from a scalable vector for ABI
       // compatibility, and the types match, use the llvm.vector.extract
       // intrinsic to perform the conversion.
-      if (auto *FixedDst = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
+      if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(RetIRTy)) {
         llvm::Value *V = CI;
-        if (auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(V->getType())) {
-          if (FixedDst->getElementType() == ScalableSrc->getElementType()) {
+        if (auto *ScalableSrcTy =
+                dyn_cast<llvm::ScalableVectorType>(V->getType())) {
+          if (FixedDstTy->getElementType() == ScalableSrcTy->getElementType()) {
             llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
-            V = Builder.CreateExtractVector(FixedDst, V, Zero, "cast.fixed");
+            V = Builder.CreateExtractVector(FixedDstTy, V, Zero, "cast.fixed");
             return RValue::get(V);
           }
         }
 
@@ -2163,26 +2163,24 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     // If Src is a fixed vector and Dst is a scalable vector, and both have the
     // same element type, use the llvm.vector.insert intrinsic to perform the
     // bitcast.
-    if (const auto *FixedSrc = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
-      if (const auto *ScalableDst = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
-        // If we are casting a fixed i8 vector to a scalable 16 x i1 predicate
+    if (auto *FixedSrcTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) {
+      if (auto *ScalableDstTy = dyn_cast<llvm::ScalableVectorType>(DstTy)) {
+        // If we are casting a fixed i8 vector to a scalable i1 predicate
         // vector, use a vector insert and bitcast the result.
-        bool NeedsBitCast = false;
-        auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
-        llvm::Type *OrigType = DstTy;
-        if (ScalableDst == PredType &&
-            FixedSrc->getElementType() == Builder.getInt8Ty()) {
-          DstTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
-          ScalableDst = cast<llvm::ScalableVectorType>(DstTy);
-          NeedsBitCast = true;
+        if (ScalableDstTy->getElementType()->isIntegerTy(1) &&
+            ScalableDstTy->getElementCount().isKnownMultipleOf(8) &&
+            FixedSrcTy->getElementType()->isIntegerTy(8)) {
+          ScalableDstTy = llvm::ScalableVectorType::get(
+              FixedSrcTy->getElementType(),
+              ScalableDstTy->getElementCount().getKnownMinValue() / 8);
         }
-        if (FixedSrc->getElementType() == ScalableDst->getElementType()) {
-          llvm::Value *UndefVec = llvm::UndefValue::get(DstTy);
+        if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) {
+          llvm::Value *UndefVec = llvm::UndefValue::get(ScalableDstTy);
           llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
           llvm::Value *Result = Builder.CreateInsertVector(
-              DstTy, UndefVec, Src, Zero, "cast.scalable");
-          if (NeedsBitCast)
-            Result = Builder.CreateBitCast(Result, OrigType);
+              ScalableDstTy, UndefVec, Src, Zero, "cast.scalable");
+          if (Result->getType() != DstTy)
+            Result = Builder.CreateBitCast(Result, DstTy);
           return Result;
         }
       }
@@ -2191,18 +2189,19 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     // If Src is a scalable vector and Dst is a fixed vector, and both have the
     // same element type, use the llvm.vector.extract intrinsic to perform the
     // bitcast.
-    if (const auto *ScalableSrc = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
-      if (const auto *FixedDst = dyn_cast<llvm::FixedVectorType>(DstTy)) {
-        // If we are casting a scalable 16 x i1 predicate vector to a fixed i8
+    if (auto *ScalableSrcTy = dyn_cast<llvm::ScalableVectorType>(SrcTy)) {
+      if (auto *FixedDstTy = dyn_cast<llvm::FixedVectorType>(DstTy)) {
+        // If we are casting a scalable i1 predicate vector to a fixed i8
         // vector, bitcast the source and use a vector extract.
-        auto PredType = llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
-        if (ScalableSrc == PredType &&
-            FixedDst->getElementType() == Builder.getInt8Ty()) {
-          SrcTy = llvm::ScalableVectorType::get(Builder.getInt8Ty(), 2);
-          ScalableSrc = cast<llvm::ScalableVectorType>(SrcTy);
-          Src = Builder.CreateBitCast(Src, SrcTy);
+        if (ScalableSrcTy->getElementType()->isIntegerTy(1) &&
+            ScalableSrcTy->getElementCount().isKnownMultipleOf(8) &&
+            FixedDstTy->getElementType()->isIntegerTy(8)) {
+          ScalableSrcTy = llvm::ScalableVectorType::get(
+              FixedDstTy->getElementType(),
+              ScalableSrcTy->getElementCount().getKnownMinValue() / 8);
+          Src = Builder.CreateBitCast(Src, ScalableSrcTy);
         }
-        if (ScalableSrc->getElementType() == FixedDst->getElementType()) {
+        if (ScalableSrcTy->getElementType() == FixedDstTy->getElementType()) {
           llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
           return Builder.CreateExtractVector(DstTy, Src, Zero, "cast.fixed");
         }