Skip to content

Commit 4fba6d2

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:f42ce1621f5f4129fb37c4a1af958e1d47344107 into amd-gfx:9e1c87f1791b
Local branch amd-gfx 9e1c87f Merged main:5a4ca51a91ff into amd-gfx:5377955b9b42 Remote branch main f42ce16 [mlir][sve][nfc] Update a test to use transform-interpreter (llvm#73771)
2 parents 9e1c87f + f42ce16 commit 4fba6d2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+5610
-659
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,3 +314,11 @@ let TargetGuard = "sme2" in {
314314

315315
def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsSharedZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>;
316316
}
317+
318+
//
319+
// Spill and fill of ZT0
320+
//
321+
let TargetGuard = "sme2" in {
322+
def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
323+
def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
324+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
7+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
8+
9+
#include <arm_sme_draft_spec_subject_to_change.h>
10+
11+
// LDR ZT0
12+
13+
// CHECK-LABEL: @test_svldr_zt(
14+
// CHECK-NEXT: entry:
15+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
16+
// CHECK-NEXT: ret void
17+
//
18+
// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv(
19+
// CPP-CHECK-NEXT: entry:
20+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
21+
// CPP-CHECK-NEXT: ret void
22+
//
23+
void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
24+
svldr_zt(0, base);
25+
}
26+
27+
// STR ZT0
28+
29+
// CHECK-LABEL: @test_svstr_zt(
30+
// CHECK-NEXT: entry:
31+
// CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
32+
// CHECK-NEXT: ret void
33+
//
34+
// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv(
35+
// CPP-CHECK-NEXT: entry:
36+
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
37+
// CPP-CHECK-NEXT: ret void
38+
//
39+
void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
40+
svstr_zt(0, base);
41+
}

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
2-
// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sve -fsyntax-only -verify %s
2+
// RUN: -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
33

44
// REQUIRES: aarch64-registered-target
55

@@ -19,3 +19,8 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t
1919
svbmops_za32_u32_m(4, pred, pred, u32, u32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
2020
svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
2121
}
22+
23+
void test_ldr_str_zt(const void *const_base, void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za {
24+
svldr_zt(1, const_base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
25+
svstr_zt(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
26+
}

libc/src/__support/CMakeLists.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,6 @@ add_header_library(
1010
libc.src.__support.CPP.new
1111
)
1212

13-
add_header_library(
14-
named_pair
15-
HDRS
16-
named_pair.h
17-
)
18-
1913
add_header_library(
2014
common
2115
HDRS
@@ -40,7 +34,6 @@ add_header_library(
4034
HDRS
4135
math_extras.h
4236
DEPENDS
43-
.named_pair
4437
libc.src.__support.CPP.type_traits
4538
libc.src.__support.macros.attributes
4639
libc.src.__support.macros.config
@@ -187,7 +180,6 @@ add_header_library(
187180
HDRS
188181
number_pair.h
189182
DEPENDS
190-
.named_pair
191183
libc.src.__support.CPP.type_traits
192184
)
193185

libc/src/__support/math_extras.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@
1010
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
1111
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
1212

13-
#include "named_pair.h"
1413
#include "src/__support/CPP/type_traits.h"
1514
#include "src/__support/macros/attributes.h" // LIBC_INLINE
1615
#include "src/__support/macros/config.h" // LIBC_HAS_BUILTIN
1716

1817
namespace LIBC_NAMESPACE {
1918

2019
// Add with carry
21-
DEFINE_NAMED_PAIR_TEMPLATE(SumCarry, sum, carry);
20+
template <typename T> struct SumCarry {
21+
T sum;
22+
T carry;
23+
};
2224

2325
// This version is always valid for constexpr.
2426
template <typename T>
@@ -91,7 +93,10 @@ add_with_carry<unsigned long long>(unsigned long long a, unsigned long long b,
9193
#endif // LIBC_HAS_BUILTIN(__builtin_addc)
9294

9395
// Subtract with borrow
94-
DEFINE_NAMED_PAIR_TEMPLATE(DiffBorrow, diff, borrow);
96+
template <typename T> struct DiffBorrow {
97+
T diff;
98+
T borrow;
99+
};
95100

96101
// This version is always valid for constexpr.
97102
template <typename T>

libc/src/__support/named_pair.h

Lines changed: 0 additions & 18 deletions
This file was deleted.

libc/src/__support/number_pair.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010
#define LLVM_LIBC_SRC___SUPPORT_NUMBER_PAIR_H
1111

1212
#include "CPP/type_traits.h"
13-
#include "named_pair.h"
1413

1514
#include <stddef.h>
1615

1716
namespace LIBC_NAMESPACE {
1817

19-
DEFINE_NAMED_PAIR_TEMPLATE(NumberPair, lo, hi);
18+
template <typename T> struct NumberPair {
19+
T lo;
20+
T hi;
21+
};
2022

2123
template <typename T>
2224
cpp::enable_if_t<cpp::is_integral_v<T> && cpp::is_unsigned_v<T>, NumberPair<T>>

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 482423
19+
#define LLVM_MAIN_REVISION 482429
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,9 +326,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
326326
return false;
327327
}
328328

329-
template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
329+
template <unsigned BaseReg, unsigned Max>
330+
bool ImmToTile(SDValue N, SDValue &Imm) {
330331
if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
331332
uint64_t C = CI->getZExtValue();
333+
334+
if (C > Max)
335+
return false;
336+
332337
Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
333338
return true;
334339
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2753,6 +2753,20 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
27532753
return BB;
27542754
}
27552755

2756+
MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI,
2757+
MachineBasicBlock *BB,
2758+
bool IsSpill) const {
2759+
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2760+
MachineInstrBuilder MIB;
2761+
unsigned Opc = IsSpill ? AArch64::STR_TX : AArch64::LDR_TX;
2762+
auto Rs = IsSpill ? RegState::Kill : RegState::Define;
2763+
MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));
2764+
MIB.addReg(MI.getOperand(0).getReg(), Rs);
2765+
MIB.add(MI.getOperand(1)); // Base
2766+
MI.eraseFromParent(); // The pseudo is gone now.
2767+
return BB;
2768+
}
2769+
27562770
MachineBasicBlock *
27572771
AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg,
27582772
MachineInstr &MI,
@@ -2869,6 +2883,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
28692883
return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
28702884
case AArch64::LDR_ZA_PSEUDO:
28712885
return EmitFill(MI, BB);
2886+
case AArch64::LDR_TX_PSEUDO:
2887+
return EmitZTSpillFill(MI, BB, /*IsSpill=*/false);
2888+
case AArch64::STR_TX_PSEUDO:
2889+
return EmitZTSpillFill(MI, BB, /*IsSpill=*/true);
28722890
case AArch64::ZERO_M_PSEUDO:
28732891
return EmitZero(MI, BB);
28742892
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,8 @@ class AArch64TargetLowering : public TargetLowering {
623623
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
624624
MachineInstr &MI, MachineBasicBlock *BB,
625625
bool HasTile) const;
626+
MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB,
627+
bool IsSpill) const;
626628
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
627629

628630
MachineBasicBlock *

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
440440
Reserved.set(SubReg);
441441
}
442442

443+
if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) {
444+
for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true);
445+
SubReg.isValid(); ++SubReg)
446+
Reserved.set(*SubReg);
447+
}
448+
443449
markSuperRegs(Reserved, AArch64::FPCR);
444450

445451
if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,8 +541,8 @@ defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops
541541

542542
def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
543543

544-
def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>;
545-
def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>;
544+
defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>;
545+
defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>;
546546

547547
def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
548548
def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13-
def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>;
14-
def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>;
15-
def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>;
16-
def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>;
17-
def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>;
13+
def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>;
14+
def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>;
15+
def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>;
16+
def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>;
17+
def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>;
18+
def imm_to_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>;
1819

1920
def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
2021
def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>;
@@ -3137,6 +3138,18 @@ class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
31373138
let mayStore = opc{7};
31383139
}
31393140

3141+
3142+
multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
3143+
def NAME : sme2_spill_fill_vector<mnemonic, opc>;
3144+
def NAME # _PSEUDO
3145+
: Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> {
3146+
// Translated to actual instruction in AArch64ISelLowering.cpp
3147+
let usesCustomInserter = 1;
3148+
}
3149+
def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base),
3150+
(!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
3151+
}
3152+
31403153
//===----------------------------------------------------------------------===///
31413154
// SME2 move to/from lookup table
31423155
class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>

llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,10 +1115,10 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
11151115
case X86II::MRMSrcMem4VOp3: {
11161116
// Instruction format for 4VOp3:
11171117
// src1(ModR/M), MemAddr, src3(VEX_4V)
1118-
Prefix.setR(MI, CurOp++);
1118+
Prefix.setRR2(MI, CurOp++);
11191119
Prefix.setBB2(MI, MemOperand + X86::AddrBaseReg);
11201120
Prefix.setXX2(MI, MemOperand + X86::AddrIndexReg);
1121-
Prefix.set4V(MI, CurOp + X86::AddrNumOperands);
1121+
Prefix.set4VV2(MI, CurOp + X86::AddrNumOperands);
11221122
break;
11231123
}
11241124
case X86II::MRMSrcMemOp4: {
@@ -1189,7 +1189,7 @@ X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
11891189
// src1(ModR/M), src2(ModR/M), src3(VEX_4V)
11901190
Prefix.setRR2(MI, CurOp++);
11911191
Prefix.setBB2(MI, CurOp++);
1192-
Prefix.set4V(MI, CurOp++);
1192+
Prefix.set4VV2(MI, CurOp++);
11931193
break;
11941194
}
11951195
case X86II::MRMSrcRegOp4: {

0 commit comments

Comments
 (0)