Skip to content

Commit ef05b08

Browse files
author
Sjoerd Meijer
committed
[AArch64] Use 64-bit movi for zeroing halfs/floats
This was using the .2d variant which zeros 128 bits, but using the .2s variant that zeros 64 bits is faster on some cores. This is a prep step for D99586 to always using movi for zeroing floats. Differential Revision: https://reviews.llvm.org/D99710
1 parent 94d0fc3 commit ef05b08

File tree

3 files changed

+18
-19
lines changed

3 files changed

+18
-19
lines changed

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,17 +1091,16 @@ void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
10911091
void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
10921092
Register DestReg = MI.getOperand(0).getReg();
10931093
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
1094-
// Convert H/S/D register to corresponding Q register
1094+
// Convert H/S register to corresponding D register
10951095
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
1096-
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
1096+
DestReg = AArch64::D0 + (DestReg - AArch64::H0);
10971097
else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)
1098-
DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
1099-
else {
1098+
DestReg = AArch64::D0 + (DestReg - AArch64::S0);
1099+
else
11001100
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
1101-
DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
1102-
}
1101+
11031102
MCInst MOVI;
1104-
MOVI.setOpcode(AArch64::MOVIv2d_ns);
1103+
MOVI.setOpcode(AArch64::MOVID);
11051104
MOVI.addOperand(MCOperand::createReg(DestReg));
11061105
MOVI.addOperand(MCOperand::createImm(0));
11071106
EmitToStreamer(*OutStreamer, MOVI);

llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ entry:
2828
; NONE16: fmov d2, xzr
2929
; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0
3030
; ZEROFP-DAG: ldr h0,{{.*}}
31-
; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
32-
; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
33-
; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
34-
; ZERO16: movi v{{[0-3]+}}.2d, #0
35-
; ZERO16: movi v{{[0-3]+}}.2d, #0
36-
; ZERO16: movi v{{[0-3]+}}.2d, #0
37-
; ZERO16: movi v{{[0-3]+}}.2d, #0
31+
; ZEROFP-DAG: movi d1, #0
32+
; ZEROFP-DAG: movi d2, #0
33+
; ZEROFP-DAG: movi v3.2d, #0
34+
; ZERO16: movi d0, #0
35+
; ZERO16: movi d1, #0
36+
; ZERO16: movi d2, #0
37+
; ZERO16: movi v3.2d, #0
3838
tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
3939
ret void
4040
}
@@ -65,8 +65,8 @@ define void @t4() nounwind ssp {
6565
; ALL-LABEL: t4:
6666
; NONEFP: fmov s{{[0-3]+}}, wzr
6767
; NONEFP: fmov s{{[0-3]+}}, wzr
68-
; ZEROFP: movi v{{[0-3]+}}.2d, #0
69-
; ZEROFP: movi v{{[0-3]+}}.2d, #0
68+
; ZEROFP: movi d0, #0
69+
; ZEROFP: movi d1, #0
7070
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
7171
ret void
7272
}
@@ -147,15 +147,15 @@ define float @tf32() {
147147
entry:
148148
; ALL-LABEL: tf32:
149149
; NONEFP: mov s0, wzr
150-
; ZEROFP: movi v0.2d, #0
150+
; ZEROFP: movi d0, #0
151151
ret float 0.0
152152
}
153153

154154
define double @td64() {
155155
entry:
156156
; ALL-LABEL: td64:
157157
; NONEFP: mov d0, xzr
158-
; ZEROFP: movi v0.2d, #0
158+
; ZEROFP: movi d0, #0
159159
ret double 0.0
160160
}
161161

llvm/test/CodeGen/AArch64/f16-imm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define half @Const0() {
1111
;
1212
; CHECK-ZCZ-LABEL: Const0:
1313
; CHECK-ZCZ: // %bb.0: // %entry
14-
; CHECK-ZCZ-NEXT: movi v0.2d, #0000000000000000
14+
; CHECK-ZCZ-NEXT: movi d0, #0
1515
; CHECK-ZCZ-NEXT: ret
1616
;
1717
; CHECK-NOFP16-LABEL: Const0:

0 commit comments

Comments
 (0)