Skip to content

Commit 26c5e07

Browse files
Steven J. Hillralfbaechle
authored andcommitted
MIPS: microMIPS: Optimise 'memset' core library function.
Optimise 'memset' to use microMIPS instructions and/or optimisations for binary size reduction. When the microMIPS ISA is not being used, the library function compiles to the original binary code. Signed-off-by: Steven J. Hill <[email protected]>
1 parent bce8608 commit 26c5e07

File tree

2 files changed

+56
-30
lines changed

2 files changed

+56
-30
lines changed

arch/mips/include/asm/asm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ symbol = value
296296
#define LONG_SUBU subu
297297
#define LONG_L lw
298298
#define LONG_S sw
299+
#define LONG_SP swp
299300
#define LONG_SLL sll
300301
#define LONG_SLLV sllv
301302
#define LONG_SRL srl
@@ -318,6 +319,7 @@ symbol = value
318319
#define LONG_SUBU dsubu
319320
#define LONG_L ld
320321
#define LONG_S sd
322+
#define LONG_SP sdp
321323
#define LONG_SLL dsll
322324
#define LONG_SLLV dsllv
323325
#define LONG_SRL dsrl

arch/mips/lib/memset.S

Lines changed: 54 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
*
66
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle
77
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
8-
* Copyright (C) 2007 Maciej W. Rozycki
8+
* Copyright (C) 2007 by Maciej W. Rozycki
9+
* Copyright (C) 2011, 2012 MIPS Technologies, Inc.
910
*/
1011
#include <asm/asm.h>
1112
#include <asm/asm-offsets.h>
@@ -19,30 +20,46 @@
1920
#define LONG_S_R sdr
2021
#endif
2122

23+
#ifdef CONFIG_CPU_MICROMIPS
24+
#define STORSIZE (LONGSIZE * 2)
25+
#define STORMASK (STORSIZE - 1)
26+
#define FILL64RG t8
27+
#define FILLPTRG t7
28+
#undef LONG_S
29+
#define LONG_S LONG_SP
30+
#else
31+
#define STORSIZE LONGSIZE
32+
#define STORMASK LONGMASK
33+
#define FILL64RG a1
34+
#define FILLPTRG t0
35+
#endif
36+
2237
#define EX(insn,reg,addr,handler) \
2338
9: insn reg, addr; \
2439
.section __ex_table,"a"; \
2540
PTR 9b, handler; \
2641
.previous
2742

2843
.macro f_fill64 dst, offset, val, fixup
29-
EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup)
30-
EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup)
31-
EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup)
32-
EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup)
33-
EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup)
34-
EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup)
35-
EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup)
36-
EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup)
37-
#if LONGSIZE == 4
38-
EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup)
39-
EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup)
40-
EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
41-
EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
42-
EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
43-
EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
44-
EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
45-
EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
44+
EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup)
45+
EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup)
46+
EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup)
47+
EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup)
48+
#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS))
49+
EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup)
50+
EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup)
51+
EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup)
52+
EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup)
53+
#endif
54+
#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4))
55+
EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup)
56+
EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup)
57+
EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
58+
EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
59+
EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
60+
EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
61+
EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
62+
EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
4663
#endif
4764
.endm
4865

@@ -71,16 +88,20 @@ LEAF(memset)
7188
1:
7289

7390
FEXPORT(__bzero)
74-
sltiu t0, a2, LONGSIZE /* very small region? */
91+
sltiu t0, a2, STORSIZE /* very small region? */
7592
bnez t0, .Lsmall_memset
76-
andi t0, a0, LONGMASK /* aligned? */
93+
andi t0, a0, STORMASK /* aligned? */
7794

95+
#ifdef CONFIG_CPU_MICROMIPS
96+
move t8, a1 /* used by 'swp' instruction */
97+
move t9, a1
98+
#endif
7899
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
79100
beqz t0, 1f
80-
PTR_SUBU t0, LONGSIZE /* alignment in bytes */
101+
PTR_SUBU t0, STORSIZE /* alignment in bytes */
81102
#else
82103
.set noat
83-
li AT, LONGSIZE
104+
li AT, STORSIZE
84105
beqz t0, 1f
85106
PTR_SUBU t0, AT /* alignment in bytes */
86107
.set at
@@ -99,24 +120,27 @@ FEXPORT(__bzero)
99120
1: ori t1, a2, 0x3f /* # of full blocks */
100121
xori t1, 0x3f
101122
beqz t1, .Lmemset_partial /* no block to fill */
102-
andi t0, a2, 0x40-LONGSIZE
123+
andi t0, a2, 0x40-STORSIZE
103124

104125
PTR_ADDU t1, a0 /* end address */
105126
.set reorder
106127
1: PTR_ADDIU a0, 64
107128
R10KCBARRIER(0(ra))
108-
f_fill64 a0, -64, a1, .Lfwd_fixup
129+
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup
109130
bne t1, a0, 1b
110131
.set noreorder
111132

112133
.Lmemset_partial:
113134
R10KCBARRIER(0(ra))
114135
PTR_LA t1, 2f /* where to start */
136+
#ifdef CONFIG_CPU_MICROMIPS
137+
LONG_SRL t7, t0, 1
138+
#endif
115139
#if LONGSIZE == 4
116-
PTR_SUBU t1, t0
140+
PTR_SUBU t1, FILLPTRG
117141
#else
118142
.set noat
119-
LONG_SRL AT, t0, 1
143+
LONG_SRL AT, FILLPTRG, 1
120144
PTR_SUBU t1, AT
121145
.set at
122146
#endif
@@ -126,9 +150,9 @@ FEXPORT(__bzero)
126150
.set push
127151
.set noreorder
128152
.set nomacro
129-
f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */
153+
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */
130154
2: .set pop
131-
andi a2, LONGMASK /* At most one long to go */
155+
andi a2, STORMASK /* At most one long to go */
132156

133157
beqz a2, 1f
134158
PTR_ADDU a0, a2 /* What's left */
@@ -169,12 +193,12 @@ FEXPORT(__bzero)
169193

170194
.Lpartial_fixup:
171195
PTR_L t0, TI_TASK($28)
172-
andi a2, LONGMASK
196+
andi a2, STORMASK
173197
LONG_L t0, THREAD_BUADDR(t0)
174198
LONG_ADDU a2, t1
175199
jr ra
176200
LONG_SUBU a2, t0
177201

178202
.Llast_fixup:
179203
jr ra
180-
andi v1, a2, LONGMASK
204+
andi v1, a2, STORMASK

0 commit comments

Comments
 (0)