Skip to content

Commit 3a0e00d

Browse files
committed
WIP: Attempt vector truncstore
1 parent e5cb18b commit 3a0e00d

18 files changed

+212
-173
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1394,6 +1394,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13941394
}
13951395
}
13961396

1397+
setTruncStoreAction(MVT::v1i64, MVT::v1i8, Legal);
1398+
13971399
for (auto Op :
13981400
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
13991401
ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
@@ -24067,7 +24069,7 @@ static SDValue performSTORECombine(SDNode *N,
2406724069
SDValue ExtIdx = Value.getOperand(1);
2406824070
EVT VectorVT = Vector.getValueType();
2406924071
EVT ElemVT = VectorVT.getVectorElementType();
24070-
if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
24072+
if (!ValueVT.isInteger())
2407124073
return SDValue();
2407224074
if (ValueVT != MemVT && !ST->isTruncatingStore())
2407324075
return SDValue();
@@ -24085,6 +24087,21 @@ static SDValue performSTORECombine(SDNode *N,
2408524087
return SDValue();
2408624088
}
2408724089

24090+
if (MemVT == MVT::i8) {
24091+
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
24092+
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
24093+
Value.getValueType(), Vector, ExtIdx);
24094+
SDValue ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v8i8,
24095+
DAG.getUNDEF(MVT::v8i8), Ext, Zero);
24096+
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::v1i64, ExtVector);
24097+
return DAG.getTruncStore(ST->getChain(), DL, Cast, ST->getBasePtr(),
24098+
MVT::v1i8, ST->getMemOperand());
24099+
}
24100+
24101+
// TODO: Handle storing i8s to wider types.
24102+
if (ElemVT == MVT::i8)
24103+
return SDValue();
24104+
2408824105
EVT FPElemVT = EVT::getFloatingPointVT(ElemVT.getSizeInBits());
2408924106
EVT FPVectorVT = VectorVT.changeVectorElementType(FPElemVT);
2409024107
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, FPVectorVT, Vector);
@@ -28826,6 +28843,10 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
2882628843
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
2882728844
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
2882828845

28846+
// Can be lowered to a bsub store in ISEL.
28847+
if (VT == MVT::v1i64 && MemVT == MVT::v1i8)
28848+
return SDValue();
28849+
2882928850
if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
2883028851
EVT TruncVT = ContainerVT.changeVectorElementType(
2883128852
Store->getMemoryVT().getVectorElementType());

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4451,8 +4451,6 @@ multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop
44514451
}
44524452

44534453
let AddedComplexity = 19 in {
4454-
defm : VecStoreLane0Pat<am_indexed8, truncstorei8, v16i8, i32, vi8, bsub, uimm12s2, STRBui>;
4455-
defm : VecStoreLane0Pat<am_indexed8, truncstorei8, v4i32, i32, vi8, bsub, uimm12s2, STRBui>;
44564454
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
44574455
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
44584456
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
@@ -4591,6 +4589,18 @@ def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
45914589
def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
45924590
(STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
45934591

4592+
// v1i64 -> bsub truncating stores
4593+
// Supporting pattern lower f32/64 -> v8i8
4594+
def : Pat<(v8i8 (vector_insert (v8i8 (undef)), (i32 FPR32:$src), 0)),
4595+
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
4596+
def : Pat<(v8i8 (vector_insert (v8i8 (undef)), (i64 FPR64:$src), 0)),
4597+
(v8i8 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub), dsub))>;
4598+
// Lower v1i64 -> v1i8 truncstore to bsub store
4599+
def : Pat<(truncstorevi8 v1i64:$VT, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4600+
(STURBi (vi8 (EXTRACT_SUBREG v1i64:$VT, bsub)), GPR64sp:$Rn, simm9:$offset)>;
4601+
def : Pat<(truncstorevi8 v1i64:$VT, (am_indexed8 GPR64sp:$Rn, uimm12s4:$offset)),
4602+
(STRBui (vi8 (EXTRACT_SUBREG v1i64:$VT, bsub)), GPR64sp:$Rn, uimm12s4:$offset)>;
4603+
45944604
// Match stores from lane 0 to the appropriate subreg's store.
45954605
multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
45964606
ValueType VTy, ValueType STy,
@@ -4600,7 +4610,6 @@ multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
46004610
}
46014611

46024612
let AddedComplexity = 19 in {
4603-
defm : VecStoreULane0Pat<truncstorei8, v16i8, i32, vi8, bsub, STURBi>;
46044613
defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
46054614
defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
46064615
defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;
@@ -7103,6 +7112,15 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
71037112
(i64 0)),
71047113
dsub)>;
71057114

7115+
def : Pat<(v8i8 (vector_insert (v8i8 (undef)),
7116+
(i32 (vector_extract v8i16:$Vt, VectorIndexH:$idx)), (i64 0))),
7117+
(v8i8 (EXTRACT_SUBREG
7118+
(INSvi16lane
7119+
(v8i16 (IMPLICIT_DEF)),
7120+
(i64 0),
7121+
$Vt,
7122+
(VectorIndexH:$idx)), dsub))>;
7123+
71067124
def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
71077125
(INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
71087126
def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
@@ -7242,6 +7260,11 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
72427260
(INS V128:$src, imm:$Immd,
72437261
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
72447262

7263+
def : Pat<(VT64 (vector_insert (VT64 (undef)),
7264+
(VTScal (vector_extract (VT128 V128:$Rn), (i64 0))),
7265+
(i64 0))),
7266+
(EXTRACT_SUBREG $Rn, dsub)>;
7267+
72457268
def : Pat<(VT64 (vector_insert V64:$src,
72467269
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
72477270
(i64 imm:$Immd))),

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,43 +1827,6 @@ let Predicates = [HasSVE] in {
18271827
defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>;
18281828
} // End HasSVE
18291829

1830-
multiclass SVEVecStoreLanePat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
1831-
ValueType VTy, ValueType STy,
1832-
ValueType SubRegTy,
1833-
SubRegIndex SubRegIdx, Operand IndexType,
1834-
Instruction STR,
1835-
Instruction DUP, AsmVectorIndexOpnd DUPIdxTy> {
1836-
let Predicates = [HasSVE_or_SME] in {
1837-
// Same as Neon VecStoreLane0Pat but without matching VecListOne128.
1838-
def : Pat<(storeop (STy (vector_extract VTy:$Vt, (i64 0))),
1839-
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
1840-
(STR (SubRegTy (EXTRACT_SUBREG $Vt, SubRegIdx)),
1841-
GPR64sp:$Rn, IndexType:$offset)>;
1842-
}
1843-
1844-
// Non-zero immediate index:
1845-
def : Pat<(storeop (STy (vector_extract VTy:$Vt, DUPIdxTy:$idx)),
1846-
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
1847-
(STR (SubRegTy (EXTRACT_SUBREG (DUP $Vt, DUPIdxTy:$idx), SubRegIdx)),
1848-
GPR64sp:$Rn, IndexType:$offset)>;
1849-
}
1850-
1851-
// Note: Types other than i8 are handled in performSTORECombine -- i8 is tricky
1852-
// to handle before ISEL as it is not really a legal type in many places, nor
1853-
// is its equivalently sized FP variant.
1854-
let AddedComplexity = 19 in {
1855-
// Lane 0 truncating stores
1856-
// i32 -> i8
1857-
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv4i32, i32, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_S, sve_elm_idx_extdup_s>;
1858-
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv4i32, i32, vi8, bsub, simm9, STURBi, DUP_ZZI_S, sve_elm_idx_extdup_s>;
1859-
// i64 -> i8
1860-
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv2i64, i64, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_D, sve_elm_idx_extdup_d>;
1861-
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv2i64, i64, vi8, bsub, simm9, STURBi, DUP_ZZI_D, sve_elm_idx_extdup_d>;
1862-
// i8 -> i8 (technically a truncate as the extracted type is i32)
1863-
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv16i8, i32, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_B, sve_elm_idx_extdup_b>;
1864-
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv16i8, i32, vi8, bsub, simm9, STURBi, DUP_ZZI_B, sve_elm_idx_extdup_b>;
1865-
}
1866-
18671830
let Predicates = [HasSVE_or_SME] in {
18681831
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
18691832

@@ -3245,6 +3208,8 @@ let Predicates = [HasSVE_or_SME] in {
32453208
// Insert scalar into undef[0]
32463209
def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)),
32473210
(INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
3211+
def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i64 FPR64:$src), 0)),
3212+
(INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
32483213
def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)),
32493214
(INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
32503215
def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)),

llvm/test/CodeGen/AArch64/aarch64-sve-ldst-one.ll

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s
3-
; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s
4-
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s
2+
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTREAMING
3+
; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
4+
; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
55

66
target triple = "aarch64-unknown-linux-gnu"
77

@@ -106,24 +106,34 @@ entry:
106106
}
107107

108108
define void @test_str_lane_s8(ptr %a, <vscale x 16 x i8> %b) {
109-
; CHECK-LABEL: test_str_lane_s8:
110-
; CHECK: // %bb.0: // %entry
111-
; CHECK-NEXT: mov z0.b, z0.b[7]
112-
; CHECK-NEXT: str b0, [x0]
113-
; CHECK-NEXT: ret
114-
109+
; CHECK-NONSTREAMING-LABEL: test_str_lane_s8:
110+
; CHECK-NONSTREAMING: // %bb.0: // %entry
111+
; CHECK-NONSTREAMING-NEXT: mov v0.b[0], v0.b[7]
112+
; CHECK-NONSTREAMING-NEXT: str b0, [x0]
113+
; CHECK-NONSTREAMING-NEXT: ret
114+
;
115+
; STREAMING-COMPAT-LABEL: test_str_lane_s8:
116+
; STREAMING-COMPAT: // %bb.0: // %entry
117+
; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
118+
; STREAMING-COMPAT-NEXT: str b0, [x0]
119+
; STREAMING-COMPAT-NEXT: ret
115120
entry:
116121
%0 = extractelement <vscale x 16 x i8> %b, i32 7
117122
store i8 %0, ptr %a, align 1
118123
ret void
119124
}
120125

121126
define void @test_str_lane0_s8(ptr %a, <vscale x 16 x i8> %b) {
122-
; CHECK-LABEL: test_str_lane0_s8:
123-
; CHECK: // %bb.0: // %entry
124-
; CHECK-NEXT: str b0, [x0]
125-
; CHECK-NEXT: ret
126-
127+
; CHECK-NONSTREAMING-LABEL: test_str_lane0_s8:
128+
; CHECK-NONSTREAMING: // %bb.0: // %entry
129+
; CHECK-NONSTREAMING-NEXT: mov v0.b[0], v0.b[0]
130+
; CHECK-NONSTREAMING-NEXT: str b0, [x0]
131+
; CHECK-NONSTREAMING-NEXT: ret
132+
;
133+
; STREAMING-COMPAT-LABEL: test_str_lane0_s8:
134+
; STREAMING-COMPAT: // %bb.0: // %entry
135+
; STREAMING-COMPAT-NEXT: str b0, [x0]
136+
; STREAMING-COMPAT-NEXT: ret
127137
entry:
128138
%0 = extractelement <vscale x 16 x i8> %b, i32 0
129139
store i8 %0, ptr %a, align 1
@@ -316,12 +326,17 @@ entry:
316326
}
317327

318328
define void @test_str_lane_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) {
319-
; CHECK-LABEL: test_str_lane_s8_negative_offset:
320-
; CHECK: // %bb.0: // %entry
321-
; CHECK-NEXT: mov z0.b, z0.b[7]
322-
; CHECK-NEXT: stur b0, [x0, #-8]
323-
; CHECK-NEXT: ret
324-
329+
; CHECK-NONSTREAMING-LABEL: test_str_lane_s8_negative_offset:
330+
; CHECK-NONSTREAMING: // %bb.0: // %entry
331+
; CHECK-NONSTREAMING-NEXT: mov v0.b[0], v0.b[7]
332+
; CHECK-NONSTREAMING-NEXT: stur b0, [x0, #-8]
333+
; CHECK-NONSTREAMING-NEXT: ret
334+
;
335+
; STREAMING-COMPAT-LABEL: test_str_lane_s8_negative_offset:
336+
; STREAMING-COMPAT: // %bb.0: // %entry
337+
; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
338+
; STREAMING-COMPAT-NEXT: stur b0, [x0, #-8]
339+
; STREAMING-COMPAT-NEXT: ret
325340
entry:
326341
%0 = extractelement <vscale x 16 x i8> %b, i32 7
327342
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
@@ -330,11 +345,16 @@ entry:
330345
}
331346

332347
define void @test_str_lane0_s8_negative_offset(ptr %a, <vscale x 16 x i8> %b) {
333-
; CHECK-LABEL: test_str_lane0_s8_negative_offset:
334-
; CHECK: // %bb.0: // %entry
335-
; CHECK-NEXT: stur b0, [x0, #-8]
336-
; CHECK-NEXT: ret
337-
348+
; CHECK-NONSTREAMING-LABEL: test_str_lane0_s8_negative_offset:
349+
; CHECK-NONSTREAMING: // %bb.0: // %entry
350+
; CHECK-NONSTREAMING-NEXT: mov v0.b[0], v0.b[0]
351+
; CHECK-NONSTREAMING-NEXT: stur b0, [x0, #-8]
352+
; CHECK-NONSTREAMING-NEXT: ret
353+
;
354+
; STREAMING-COMPAT-LABEL: test_str_lane0_s8_negative_offset:
355+
; STREAMING-COMPAT: // %bb.0: // %entry
356+
; STREAMING-COMPAT-NEXT: stur b0, [x0, #-8]
357+
; STREAMING-COMPAT-NEXT: ret
338358
entry:
339359
%0 = extractelement <vscale x 16 x i8> %b, i32 0
340360
%out_ptr = getelementptr inbounds i8, ptr %a, i64 -8
@@ -398,12 +418,18 @@ entry:
398418

399419

400420
define void @test_str_trunc_lane_s32_to_s8(ptr %a, <vscale x 4 x i32> %b) {
401-
; CHECK-LABEL: test_str_trunc_lane_s32_to_s8:
402-
; CHECK: // %bb.0: // %entry
403-
; CHECK-NEXT: mov z0.s, z0.s[3]
404-
; CHECK-NEXT: str b0, [x0]
405-
; CHECK-NEXT: ret
406-
421+
; CHECK-NONSTREAMING-LABEL: test_str_trunc_lane_s32_to_s8:
422+
; CHECK-NONSTREAMING: // %bb.0: // %entry
423+
; CHECK-NONSTREAMING-NEXT: mov w8, v0.s[3]
424+
; CHECK-NONSTREAMING-NEXT: fmov s0, w8
425+
; CHECK-NONSTREAMING-NEXT: str b0, [x0]
426+
; CHECK-NONSTREAMING-NEXT: ret
427+
;
428+
; STREAMING-COMPAT-LABEL: test_str_trunc_lane_s32_to_s8:
429+
; STREAMING-COMPAT: // %bb.0: // %entry
430+
; STREAMING-COMPAT-NEXT: mov z0.s, z0.s[3]
431+
; STREAMING-COMPAT-NEXT: str b0, [x0]
432+
; STREAMING-COMPAT-NEXT: ret
407433
entry:
408434
%0 = extractelement <vscale x 4 x i32> %b, i32 3
409435
%trunc = trunc i32 %0 to i8
@@ -468,12 +494,18 @@ entry:
468494
}
469495

470496
define void @test_str_trunc_lane_s32_to_s8_negative_offset(ptr %a, <vscale x 4 x i32> %b) {
471-
; CHECK-LABEL: test_str_trunc_lane_s32_to_s8_negative_offset:
472-
; CHECK: // %bb.0: // %entry
473-
; CHECK-NEXT: mov z0.s, z0.s[3]
474-
; CHECK-NEXT: stur b0, [x0, #-8]
475-
; CHECK-NEXT: ret
476-
497+
; CHECK-NONSTREAMING-LABEL: test_str_trunc_lane_s32_to_s8_negative_offset:
498+
; CHECK-NONSTREAMING: // %bb.0: // %entry
499+
; CHECK-NONSTREAMING-NEXT: mov w8, v0.s[3]
500+
; CHECK-NONSTREAMING-NEXT: fmov s0, w8
501+
; CHECK-NONSTREAMING-NEXT: stur b0, [x0, #-8]
502+
; CHECK-NONSTREAMING-NEXT: ret
503+
;
504+
; STREAMING-COMPAT-LABEL: test_str_trunc_lane_s32_to_s8_negative_offset:
505+
; STREAMING-COMPAT: // %bb.0: // %entry
506+
; STREAMING-COMPAT-NEXT: mov z0.s, z0.s[3]
507+
; STREAMING-COMPAT-NEXT: stur b0, [x0, #-8]
508+
; STREAMING-COMPAT-NEXT: ret
477509
entry:
478510
%0 = extractelement <vscale x 4 x i32> %b, i32 3
479511
%trunc = trunc i32 %0 to i8

llvm/test/CodeGen/AArch64/add.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ define void @v3i8(ptr %p1, ptr %p2) {
100100
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
101101
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
102102
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
103-
; CHECK-SD-NEXT: umov w8, v0.h[2]
103+
; CHECK-SD-NEXT: mov v0.h[0], v0.h[2]
104104
; CHECK-SD-NEXT: str s1, [sp, #12]
105-
; CHECK-SD-NEXT: ldrh w9, [sp, #12]
106-
; CHECK-SD-NEXT: strb w8, [x0, #2]
107-
; CHECK-SD-NEXT: strh w9, [x0]
105+
; CHECK-SD-NEXT: ldrh w8, [sp, #12]
106+
; CHECK-SD-NEXT: stur b0, [x0, #2]
107+
; CHECK-SD-NEXT: strh w8, [x0]
108108
; CHECK-SD-NEXT: add sp, sp, #16
109109
; CHECK-SD-NEXT: ret
110110
;

llvm/test/CodeGen/AArch64/andorxor.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -292,11 +292,11 @@ define void @and_v3i8(ptr %p1, ptr %p2) {
292292
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
293293
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
294294
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
295-
; CHECK-SD-NEXT: umov w8, v0.h[2]
295+
; CHECK-SD-NEXT: mov v0.h[0], v0.h[2]
296296
; CHECK-SD-NEXT: str s1, [sp, #12]
297-
; CHECK-SD-NEXT: ldrh w9, [sp, #12]
298-
; CHECK-SD-NEXT: strb w8, [x0, #2]
299-
; CHECK-SD-NEXT: strh w9, [x0]
297+
; CHECK-SD-NEXT: ldrh w8, [sp, #12]
298+
; CHECK-SD-NEXT: stur b0, [x0, #2]
299+
; CHECK-SD-NEXT: strh w8, [x0]
300300
; CHECK-SD-NEXT: add sp, sp, #16
301301
; CHECK-SD-NEXT: ret
302302
;
@@ -340,11 +340,11 @@ define void @or_v3i8(ptr %p1, ptr %p2) {
340340
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
341341
; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
342342
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
343-
; CHECK-SD-NEXT: umov w8, v0.h[2]
343+
; CHECK-SD-NEXT: mov v0.h[0], v0.h[2]
344344
; CHECK-SD-NEXT: str s1, [sp, #12]
345-
; CHECK-SD-NEXT: ldrh w9, [sp, #12]
346-
; CHECK-SD-NEXT: strb w8, [x0, #2]
347-
; CHECK-SD-NEXT: strh w9, [x0]
345+
; CHECK-SD-NEXT: ldrh w8, [sp, #12]
346+
; CHECK-SD-NEXT: stur b0, [x0, #2]
347+
; CHECK-SD-NEXT: strh w8, [x0]
348348
; CHECK-SD-NEXT: add sp, sp, #16
349349
; CHECK-SD-NEXT: ret
350350
;
@@ -388,11 +388,11 @@ define void @xor_v3i8(ptr %p1, ptr %p2) {
388388
; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
389389
; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
390390
; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
391-
; CHECK-SD-NEXT: umov w8, v0.h[2]
391+
; CHECK-SD-NEXT: mov v0.h[0], v0.h[2]
392392
; CHECK-SD-NEXT: str s1, [sp, #12]
393-
; CHECK-SD-NEXT: ldrh w9, [sp, #12]
394-
; CHECK-SD-NEXT: strb w8, [x0, #2]
395-
; CHECK-SD-NEXT: strh w9, [x0]
393+
; CHECK-SD-NEXT: ldrh w8, [sp, #12]
394+
; CHECK-SD-NEXT: stur b0, [x0, #2]
395+
; CHECK-SD-NEXT: strh w8, [x0]
396396
; CHECK-SD-NEXT: add sp, sp, #16
397397
; CHECK-SD-NEXT: ret
398398
;

llvm/test/CodeGen/AArch64/arm64-collect-loh.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,6 @@ define <1 x i8> @getL() {
612612
; CHECK-LABEL: _setL
613613
; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
614614
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
615-
; CHECK-NEXT: ; kill
616615
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
617616
; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _L@GOTPAGEOFF]
618617
; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:

0 commit comments

Comments
 (0)