ROCm
diff --git a/‎clang/test/Driver/aarch64-sve.c
Lines changed: 4 additions & 5 deletions b/‎clang/test/Driver/aarch64-sve.c
Lines changed: 4 additions & 5 deletions
diff --git a/‎clang/test/Preprocessor/aarch64-target-features.c
Lines changed: 1 addition & 1 deletion b/‎clang/test/Preprocessor/aarch64-target-features.c
Lines changed: 1 addition & 1 deletion
diff --git a/‎lld/COFF/Config.h
Lines changed: 4 additions & 4 deletions b/‎lld/COFF/Config.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎lld/COFF/Driver.cpp
Lines changed: 2 additions & 0 deletions b/‎lld/COFF/Driver.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎lld/COFF/DriverUtils.cpp
Lines changed: 12 additions & 2 deletions b/‎lld/COFF/DriverUtils.cpp
Lines changed: 12 additions & 2 deletions
diff --git a/‎lld/test/COFF/exportas.test
Lines changed: 88 additions & 0 deletions b/‎lld/test/COFF/exportas.test
Lines changed: 88 additions & 0 deletions
diff --git a/‎llvm/docs/ReleaseNotes.rst
Lines changed: 1 addition & 0 deletions b/‎llvm/docs/ReleaseNotes.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/include/llvm/IR/Verifier.h
Lines changed: 1 addition & 0 deletions b/‎llvm/include/llvm/IR/Verifier.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/IR/Verifier.cpp
Lines changed: 32 additions & 0 deletions b/‎llvm/lib/IR/Verifier.cpp
Lines changed: 32 additions & 0 deletions
diff --git a/‎llvm/lib/Object/COFFImportFile.cpp
Lines changed: 2 additions & 2 deletions b/‎llvm/lib/Object/COFFImportFile.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Lines changed: 15 additions & 5 deletions b/‎llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Lines changed: 15 additions & 5 deletions
diff --git a/‎llvm/lib/TargetParser/AArch64TargetParser.cpp
Lines changed: 0 additions & 5 deletions b/‎llvm/lib/TargetParser/AArch64TargetParser.cpp
Lines changed: 0 additions & 5 deletions
diff --git a/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Lines changed: 14 additions & 14 deletions b/‎llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
Lines changed: 14 additions & 14 deletions
@@ -6,12 +6,11 @@
 // RUN: %clang --target=aarch64 -march=armv8.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV8A-NOSVE %s
 // GENERICV8A-NOSVE-NOT: "-target-feature" "+sve"
 
-// The 32-bit floating point matrix multiply extension is enabled by default
-// for armv8.6-a targets (or later) with SVE, and can optionally be enabled for
-// any target from armv8.2a onwards (we don't enforce not using it with earlier
-// targets).
+// The 32-bit floating point matrix multiply extension is an optional feature
+// that can be used for any target from armv8.2a and onwards. This can be
+// enabled using the `+f32mm` option.`.
 // RUN: %clang --target=aarch64 -march=armv8.6a       -### -c %s 2>&1 | FileCheck -check-prefix=NO-F32MM %s
-// RUN: %clang --target=aarch64 -march=armv8.6a+sve   -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
+// RUN: %clang --target=aarch64 -march=armv8.6a+sve+f32mm   -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
 // RUN: %clang --target=aarch64 -march=armv8.5a+f32mm -### -c %s 2>&1 | FileCheck -check-prefix=F32MM %s
 // NO-F32MM-NOT: "-target-feature" "+f32mm"
 // F32MM: "-target-feature" "+f32mm"
 
@@ -196,7 +196,7 @@
 // CHECK-8_6-NOT: __ARM_FEATURE_SHA3 1
 // CHECK-8_6-NOT: __ARM_FEATURE_SM4 1
 
-// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve+f32mm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s
 // CHECK-SVE-8_6: __ARM_FEATURE_SVE 1
 // CHECK-SVE-8_6: __ARM_FEATURE_SVE_BF16 1
 // CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_FP32 1
 
@@ -54,6 +54,7 @@ enum class EmitKind { Obj, LLVM, ASM };
 struct Export {
   StringRef name;       // N in /export:N or /export:E=N
   StringRef extName;    // E in /export:E=N
+  StringRef exportAs;   // E in /export:N,EXPORTAS,E
   StringRef aliasTarget; // GNU specific: N in "alias == N"
   Symbol *sym = nullptr;
   uint16_t ordinal = 0;
@@ -73,10 +74,9 @@ struct Export {
   StringRef exportName; // Name in DLL
 
   bool operator==(const Export &e) const {
-    return (name == e.name && extName == e.extName &&
-            aliasTarget == e.aliasTarget &&
-            ordinal == e.ordinal && noname == e.noname &&
-            data == e.data && isPrivate == e.isPrivate);
+    return (name == e.name && extName == e.extName && exportAs == e.exportAs &&
+            aliasTarget == e.aliasTarget && ordinal == e.ordinal &&
+            noname == e.noname && data == e.data && isPrivate == e.isPrivate);
   }
 };
 
 
@@ -945,6 +945,7 @@ void LinkerDriver::createImportLibrary(bool asLib) {
     e2.Name = std::string(e1.name);
     e2.SymbolName = std::string(e1.symbolName);
     e2.ExtName = std::string(e1.extName);
+    e2.ExportAs = std::string(e1.exportAs);
     e2.AliasTarget = std::string(e1.aliasTarget);
     e2.Ordinal = e1.ordinal;
     e2.Noname = e1.noname;
@@ -1044,6 +1045,7 @@ void LinkerDriver::parseModuleDefs(StringRef path) {
       e2.name = saver().save(e1.Name);
       e2.extName = saver().save(e1.ExtName);
     }
+    e2.exportAs = saver().save(e1.ExportAs);
     e2.aliasTarget = saver().save(e1.AliasTarget);
     e2.ordinal = e1.Ordinal;
     e2.noname = e1.Noname;
 
@@ -585,7 +585,8 @@ Export LinkerDriver::parseExport(StringRef arg) {
     }
   }
 
-  // Optional parameters "[,@ordinal[,NONAME]][,DATA][,PRIVATE]"
+  // Optional parameters
+  // "[,@ordinal[,NONAME]][,DATA][,PRIVATE][,EXPORTAS,exportname]"
   while (!rest.empty()) {
     StringRef tok;
     std::tie(tok, rest) = rest.split(",");
@@ -607,6 +608,13 @@ Export LinkerDriver::parseExport(StringRef arg) {
       e.isPrivate = true;
       continue;
     }
+    if (tok.equals_insensitive("exportas")) {
+      if (!rest.empty() && !rest.contains(','))
+        e.exportAs = rest;
+      else
+        error("invalid EXPORTAS value: " + rest);
+      break;
+    }
     if (tok.starts_with("@")) {
       int32_t ord;
       if (tok.substr(1).getAsInteger(0, ord))
@@ -683,7 +691,9 @@ void LinkerDriver::fixupExports() {
   }
 
   for (Export &e : ctx.config.exports) {
-    if (!e.forwardTo.empty()) {
+    if (!e.exportAs.empty()) {
+      e.exportName = e.exportAs;
+    } else if (!e.forwardTo.empty()) {
       e.exportName = undecorate(ctx, e.name);
     } else {
       e.exportName = undecorate(ctx, e.extName.empty() ? e.name : e.extName);
 
@@ -9,6 +9,77 @@ RUN: lld-link -out:out1.dll -dll -noentry test.obj test.lib
 RUN: llvm-readobj --coff-imports out1.dll | FileCheck --check-prefix=IMPORT %s
 IMPORT: Symbol: expfunc
 
+Pass -export argument with EXPORTAS.
+
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows func.s -o func.obj
+RUN: lld-link -out:out2.dll -dll -noentry func.obj -export:func,EXPORTAS,expfunc
+RUN: llvm-readobj --coff-exports out2.dll | FileCheck --check-prefix=EXPORT %s
+EXPORT: Name: expfunc
+
+RUN: llvm-readobj out2.lib | FileCheck --check-prefix=IMPLIB %s
+IMPLIB:      Name type: export as
+IMPLIB-NEXT: Export name: expfunc
+IMPLIB-NEXT: Symbol: __imp_func
+IMPLIB-NEXT: Symbol: func
+
+Use .drectve section with EXPORTAS.
+
+RUN: llvm-mc -filetype=obj -triple=x86_64-windows drectve.s -o drectve.obj
+RUN: lld-link -out:out3.dll -dll -noentry func.obj drectve.obj
+RUN: llvm-readobj --coff-exports out3.dll | FileCheck --check-prefix=EXPORT %s
+RUN: llvm-readobj out3.lib | FileCheck --check-prefix=IMPLIB %s
+
+Use a .def file with EXPORTAS.
+
+RUN: lld-link -out:out4.dll -dll -noentry func.obj -def:test.def
+RUN: llvm-readobj --coff-exports out4.dll | FileCheck --check-prefix=EXPORT %s
+RUN: llvm-readobj out4.lib | FileCheck --check-prefix=IMPLIB %s
+
+Use a .def file with EXPORTAS in a forwarding export.
+
+RUN: lld-link -out:out5.dll -dll -noentry func.obj -def:test2.def
+RUN: llvm-readobj --coff-exports out5.dll | FileCheck --check-prefix=FORWARD-EXPORT %s
+FORWARD-EXPORT:      Export {
+FORWARD-EXPORT-NEXT:   Ordinal: 1
+FORWARD-EXPORT-NEXT:   Name: expfunc
+FORWARD-EXPORT-NEXT:   ForwardedTo: otherdll.otherfunc
+FORWARD-EXPORT-NEXT: }
+
+RUN: llvm-readobj out5.lib | FileCheck --check-prefix=FORWARD-IMPLIB %s
+FORWARD-IMPLIB:      Name type: export as
+FORWARD-IMPLIB-NEXT: Export name: expfunc
+FORWARD-IMPLIB-NEXT: Symbol: __imp_func
+FORWARD-IMPLIB-NEXT: Symbol: func
+
+Pass -export argument with EXPORTAS in a forwarding export.
+
+RUN: lld-link -out:out6.dll -dll -noentry func.obj -export:func=otherdll.otherfunc,EXPORTAS,expfunc
+RUN: llvm-readobj --coff-exports out6.dll | FileCheck --check-prefix=FORWARD-EXPORT %s
+RUN: llvm-readobj out6.lib | FileCheck --check-prefix=FORWARD-IMPLIB %s
+
+Pass -export argument with EXPORTAS in a data export.
+
+RUN: lld-link -out:out7.dll -dll -noentry func.obj -export:func,DATA,@5,EXPORTAS,expfunc
+RUN: llvm-readobj --coff-exports out7.dll | FileCheck --check-prefix=ORD %s
+ORD:      Ordinal: 5
+ORD-NEXT: Name: expfunc
+
+RUN: llvm-readobj out7.lib | FileCheck --check-prefix=ORD-IMPLIB %s
+ORD-IMPLIB:      Type: data
+ORD-IMPLIB-NEXT: Name type: export as
+ORD-IMPLIB-NEXT: Export name: expfunc
+ORD-IMPLIB-NEXT: Symbol: __imp_func
+
+Check invalid EXPORTAS syntax.
+
+RUN: not lld-link -out:err1.dll -dll -noentry func.obj -export:func,EXPORTAS, 2>&1 | \
+RUN:     FileCheck --check-prefix=ERR1 %s
+ERR1: error: invalid EXPORTAS value: {{$}}
+
+RUN: not lld-link -out:err2.dll -dll -noentry func.obj -export:func,EXPORTAS,expfunc,DATA 2>&1 | \
+RUN:     FileCheck --check-prefix=ERR2 %s
+ERR2: error: invalid EXPORTAS value: expfunc,DATA
+
 #--- test.s
     .section ".test", "rd"
     .rva __imp_func
@@ -17,3 +88,20 @@ IMPORT: Symbol: expfunc
 LIBRARY test.dll
 EXPORTS
     func EXPORTAS expfunc
+
+#--- test2.def
+LIBRARY test.dll
+EXPORTS
+    func=otherdll.otherfunc EXPORTAS expfunc
+
+#--- func.s
+    .text
+    .globl func
+    .p2align 2, 0x0
+func:
+    movl $1, %eax
+    retq
+
+#--- drectve.s
+    .section .drectve, "yn"
+    .ascii " -export:func,EXPORTAS,expfunc"
@@ -76,6 +76,7 @@ Changes to the AMDGPU Backend
 
 Changes to the ARM Backend
 --------------------------
+* FEAT_F32MM is no longer activated by default when using `+sve` on v8.6-A or greater. The feature is still available and can be used by adding `+f32mm` to the command line options.
 
 Changes to the AVR Backend
 --------------------------
 
@@ -77,6 +77,7 @@ class TBAAVerifier {
   /// Visit an instruction and return true if it is valid, return false if an
   /// invalid TBAA is attached.
   bool visitTBAAMetadata(Instruction &I, const MDNode *MD);
+  bool visitTBAAStructMetadata(Instruction &I, const MDNode *MD);
 };
 
 /// Check a function for errors, useful for use when debugging a
 
@@ -5172,6 +5172,9 @@ void Verifier::visitInstruction(Instruction &I) {
   if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
     TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
 
+  if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa_struct))
+    TBAAVerifyHelper.visitTBAAStructMetadata(I, TBAA);
+
   if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias))
     visitAliasScopeListMetadata(MD);
   if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope))
@@ -7526,6 +7529,35 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
   return true;
 }
 
+bool TBAAVerifier::visitTBAAStructMetadata(Instruction &I, const MDNode *MD) {
+  CheckTBAA(MD->getNumOperands() % 3 == 0,
+            "tbaa.struct operands must occur in groups of three", &I, MD);
+
+  // Each group of three operands must consist of two integers and a
+  // tbaa node. Moreover, the regions described by the offset and size
+  // operands must be non-overlapping.
+  std::optional<APInt> NextFree;
+  for (unsigned int Idx = 0; Idx < MD->getNumOperands(); Idx += 3) {
+    auto *OffsetCI =
+        mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx));
+    CheckTBAA(OffsetCI, "Offset must be a constant integer", &I, MD);
+
+    auto *SizeCI =
+        mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(Idx + 1));
+    CheckTBAA(SizeCI, "Size must be a constant integer", &I, MD);
+
+    MDNode *TBAA = dyn_cast_or_null<MDNode>(MD->getOperand(Idx + 2));
+    CheckTBAA(TBAA, "TBAA tag missing", &I, MD);
+    visitTBAAMetadata(I, TBAA);
+
+    bool NonOverlapping = !NextFree || NextFree->ule(OffsetCI->getValue());
+    CheckTBAA(NonOverlapping, "Overlapping tbaa.struct regions", &I, MD);
+
+    NextFree = OffsetCI->getValue() + SizeCI->getValue();
+  }
+  return true;
+}
+
 char VerifierLegacyPass::ID = 0;
 INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
 
 
@@ -690,12 +690,12 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path,
       if (ImportType == IMPORT_CODE && isArm64EC(M)) {
         if (std::optional<std::string> MangledName =
                 getArm64ECMangledFunctionName(Name)) {
-          if (ExportName.empty()) {
+          if (!E.Noname && ExportName.empty()) {
             NameType = IMPORT_NAME_EXPORTAS;
             ExportName.swap(Name);
           }
           Name = std::move(*MangledName);
-        } else if (ExportName.empty()) {
+        } else if (!E.Noname && ExportName.empty()) {
           NameType = IMPORT_NAME_EXPORTAS;
           ExportName = std::move(*getArm64ECDemangledFunctionName(Name));
         }
 
@@ -909,23 +909,33 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   if (!IsTypeLegal)
     return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
 
+  std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
+
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
-  // FIXME: Need to consider vsetvli and lmul.
   int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
                 (int)Log2_32(Src->getScalarSizeInBits());
   switch (ISD) {
   case ISD::SIGN_EXTEND:
-  case ISD::ZERO_EXTEND:
-    if (Src->getScalarSizeInBits() == 1) {
+  case ISD::ZERO_EXTEND: {
+    const unsigned SrcEltSize = Src->getScalarSizeInBits();
+    if (SrcEltSize == 1) {
       // We do not use vsext/vzext to extend from mask vector.
       // Instead we use the following instructions to extend from mask vector:
       // vmv.v.i v8, 0
       // vmerge.vim v8, v8, -1, v0
-      return 2;
+      return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM},
+                                     DstLT.second, CostKind);
     }
-    return 1;
+    if ((PowDiff < 1) || (PowDiff > 3))
+      return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+    unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
+    unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
+    unsigned Op =
+        (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];
+    return getRISCVInstructionCost(Op, DstLT.second, CostKind);
+  }
   case ISD::TRUNCATE:
     if (Dst->getScalarSizeInBits() == 1) {
       // We do not use several vncvt to truncate to mask vector. So we could
 
@@ -186,11 +186,6 @@ void AArch64::ExtensionSet::enable(ArchExtKind E) {
   // Special cases for dependencies which vary depending on the base
   // architecture version.
   if (BaseArch) {
-    // +sve implies +f32mm if the base architecture is v8.6A+ or v9.1A+
-    // It isn't the case in general that sve implies both f64mm and f32mm
-    if (E == AEK_SVE && BaseArch->is_superset(ARMV8_6A))
-      enable(AEK_F32MM);
-
     // +fp16 implies +fp16fml for v8.4A+, but not v9.0-A+
     if (E == AEK_FP16 && BaseArch->is_superset(ARMV8_4A) &&
         !BaseArch->is_superset(ARMV9A))
 
@@ -611,6 +611,18 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
       Y->getType() == Z->getType())
     return createPowiExpr(I, *this, X, Y, Z);
 
+  // powi(X, Y) / X --> powi(X, Y-1)
+  // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
+  // are required.
+  // TODO: Multi-use may be also better off creating Powi(x,y-1)
+  if (I.hasAllowReassoc() && I.hasNoNaNs() &&
+      match(Op0, m_OneUse(m_AllowReassoc(m_Intrinsic<Intrinsic::powi>(
+                     m_Specific(Op1), m_Value(Y))))) &&
+      willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
+    Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
+    return createPowiExpr(I, *this, Op1, Y, NegOne);
+  }
+
   return nullptr;
 }
 
@@ -1904,20 +1916,8 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
     return replaceInstUsesWith(I, Pow);
   }
 
-  // powi(X, Y) / X --> powi(X, Y-1)
-  // This is legal when (Y - 1) can't wraparound, in which case reassoc and nnan
-  // are required.
-  // TODO: Multi-use may be also better off creating Powi(x,y-1)
-  if (I.hasAllowReassoc() && I.hasNoNaNs() &&
-      match(Op0, m_OneUse(m_Intrinsic<Intrinsic::powi>(m_Specific(Op1),
-                                                       m_Value(Y)))) &&
-      willNotOverflowSignedSub(Y, ConstantInt::get(Y->getType(), 1), I)) {
-    Constant *NegOne = ConstantInt::getAllOnesValue(Y->getType());
-    Value *Y1 = Builder.CreateAdd(Y, NegOne);
-    Type *Types[] = {Op1->getType(), Y1->getType()};
-    Value *Pow = Builder.CreateIntrinsic(Intrinsic::powi, Types, {Op1, Y1}, &I);
-    return replaceInstUsesWith(I, Pow);
-  }
+  if (Instruction *FoldedPowi = foldPowiReassoc(I))
+    return FoldedPowi;
 
   return nullptr;
 }