Skip to content

Commit 72eb6a7

Browse files
committed
Merge branch 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (30 commits) gameport: use this_cpu_read instead of lookup x86: udelay: Use this_cpu_read to avoid address calculation x86: Use this_cpu_inc_return for nmi counter x86: Replace uses of current_cpu_data with this_cpu ops x86: Use this_cpu_ops to optimize code vmstat: User per cpu atomics to avoid interrupt disable / enable irq_work: Use per cpu atomics instead of regular atomics cpuops: Use cmpxchg for xchg to avoid lock semantics x86: this_cpu_cmpxchg and this_cpu_xchg operations percpu: Generic this_cpu_cmpxchg() and this_cpu_xchg support percpu,x86: relocate this_cpu_add_return() and friends connector: Use this_cpu operations xen: Use this_cpu_inc_return taskstats: Use this_cpu_ops random: Use this_cpu_inc_return fs: Use this_cpu_inc_return in buffer.c highmem: Use this_cpu_xx_return() operations vmstat: Use this_cpu_inc_return for vm statistics x86: Support for this_cpu_add, sub, dec, inc_return percpu: Generic support for this_cpu_add, sub, dec, inc_return ... Fixed up conflicts: in arch/x86/kernel/{apic/nmi.c, apic/x2apic_uv_x.c, process.c} as per Tejun.
2 parents 23d69b0 + 55ee4ef commit 72eb6a7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+703
-275
lines changed

MAINTAINERS

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4653,6 +4653,16 @@ S: Maintained
46534653
F: crypto/pcrypt.c
46544654
F: include/crypto/pcrypt.h
46554655

4656+
PER-CPU MEMORY ALLOCATOR
4657+
M: Tejun Heo <[email protected]>
4658+
M: Christoph Lameter <[email protected]>
4659+
4660+
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
4661+
S: Maintained
4662+
F: include/linux/percpu*.h
4663+
F: mm/percpu*.c
4664+
F: arch/*/include/asm/percpu.h
4665+
46564666
PER-TASK DELAY ACCOUNTING
46574667
M: Balbir Singh <[email protected]>
46584668
S: Maintained

arch/x86/Kconfig.cpu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
310310
config X86_CMPXCHG
311311
def_bool X86_64 || (X86_32 && !M386)
312312

313+
config CMPXCHG_LOCAL
314+
def_bool X86_64 || (X86_32 && !M386)
315+
313316
config X86_L1_CACHE_SHIFT
314317
int
315318
default "7" if MPENTIUM4 || MPSC

arch/x86/include/asm/debugreg.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ static inline void hw_breakpoint_disable(void)
9494

9595
static inline int hw_breakpoint_active(void)
9696
{
97-
return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
97+
return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
9898
}
9999

100100
extern void aout_dump_debugregs(struct user *dump);

arch/x86/include/asm/percpu.h

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,125 @@ do { \
229229
} \
230230
})
231231

232+
/*
233+
* Add return operation
234+
*/
235+
#define percpu_add_return_op(var, val) \
236+
({ \
237+
typeof(var) paro_ret__ = val; \
238+
switch (sizeof(var)) { \
239+
case 1: \
240+
asm("xaddb %0, "__percpu_arg(1) \
241+
: "+q" (paro_ret__), "+m" (var) \
242+
: : "memory"); \
243+
break; \
244+
case 2: \
245+
asm("xaddw %0, "__percpu_arg(1) \
246+
: "+r" (paro_ret__), "+m" (var) \
247+
: : "memory"); \
248+
break; \
249+
case 4: \
250+
asm("xaddl %0, "__percpu_arg(1) \
251+
: "+r" (paro_ret__), "+m" (var) \
252+
: : "memory"); \
253+
break; \
254+
case 8: \
255+
asm("xaddq %0, "__percpu_arg(1) \
256+
: "+re" (paro_ret__), "+m" (var) \
257+
: : "memory"); \
258+
break; \
259+
default: __bad_percpu_size(); \
260+
} \
261+
paro_ret__ += val; \
262+
paro_ret__; \
263+
})
264+
265+
/*
266+
* xchg is implemented using cmpxchg without a lock prefix. xchg is
267+
* expensive due to the implied lock prefix. The processor cannot prefetch
268+
* cachelines if xchg is used.
269+
*/
270+
#define percpu_xchg_op(var, nval) \
271+
({ \
272+
typeof(var) pxo_ret__; \
273+
typeof(var) pxo_new__ = (nval); \
274+
switch (sizeof(var)) { \
275+
case 1: \
276+
asm("\n1:mov "__percpu_arg(1)",%%al" \
277+
"\n\tcmpxchgb %2, "__percpu_arg(1) \
278+
"\n\tjnz 1b" \
279+
: "=a" (pxo_ret__), "+m" (var) \
280+
: "q" (pxo_new__) \
281+
: "memory"); \
282+
break; \
283+
case 2: \
284+
asm("\n1:mov "__percpu_arg(1)",%%ax" \
285+
"\n\tcmpxchgw %2, "__percpu_arg(1) \
286+
"\n\tjnz 1b" \
287+
: "=a" (pxo_ret__), "+m" (var) \
288+
: "r" (pxo_new__) \
289+
: "memory"); \
290+
break; \
291+
case 4: \
292+
asm("\n1:mov "__percpu_arg(1)",%%eax" \
293+
"\n\tcmpxchgl %2, "__percpu_arg(1) \
294+
"\n\tjnz 1b" \
295+
: "=a" (pxo_ret__), "+m" (var) \
296+
: "r" (pxo_new__) \
297+
: "memory"); \
298+
break; \
299+
case 8: \
300+
asm("\n1:mov "__percpu_arg(1)",%%rax" \
301+
"\n\tcmpxchgq %2, "__percpu_arg(1) \
302+
"\n\tjnz 1b" \
303+
: "=a" (pxo_ret__), "+m" (var) \
304+
: "r" (pxo_new__) \
305+
: "memory"); \
306+
break; \
307+
default: __bad_percpu_size(); \
308+
} \
309+
pxo_ret__; \
310+
})
311+
312+
/*
313+
* cmpxchg has no such implied lock semantics as a result it is much
314+
* more efficient for cpu local operations.
315+
*/
316+
#define percpu_cmpxchg_op(var, oval, nval) \
317+
({ \
318+
typeof(var) pco_ret__; \
319+
typeof(var) pco_old__ = (oval); \
320+
typeof(var) pco_new__ = (nval); \
321+
switch (sizeof(var)) { \
322+
case 1: \
323+
asm("cmpxchgb %2, "__percpu_arg(1) \
324+
: "=a" (pco_ret__), "+m" (var) \
325+
: "q" (pco_new__), "0" (pco_old__) \
326+
: "memory"); \
327+
break; \
328+
case 2: \
329+
asm("cmpxchgw %2, "__percpu_arg(1) \
330+
: "=a" (pco_ret__), "+m" (var) \
331+
: "r" (pco_new__), "0" (pco_old__) \
332+
: "memory"); \
333+
break; \
334+
case 4: \
335+
asm("cmpxchgl %2, "__percpu_arg(1) \
336+
: "=a" (pco_ret__), "+m" (var) \
337+
: "r" (pco_new__), "0" (pco_old__) \
338+
: "memory"); \
339+
break; \
340+
case 8: \
341+
asm("cmpxchgq %2, "__percpu_arg(1) \
342+
: "=a" (pco_ret__), "+m" (var) \
343+
: "r" (pco_new__), "0" (pco_old__) \
344+
: "memory"); \
345+
break; \
346+
default: __bad_percpu_size(); \
347+
} \
348+
pco_ret__; \
349+
})
350+
232351
/*
233352
* percpu_read() makes gcc load the percpu variable every time it is
234353
* accessed while percpu_read_stable() allows the value to be cached.
@@ -267,6 +386,12 @@ do { \
267386
#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
268387
#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
269388
#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
389+
/*
390+
* Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
391+
* faster than an xchg with forced lock semantics.
392+
*/
393+
#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
394+
#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
270395

271396
#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
272397
#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -286,6 +411,11 @@ do { \
286411
#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
287412
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
288413
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
414+
#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
415+
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
416+
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
417+
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
418+
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
289419

290420
#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
291421
#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
@@ -299,6 +429,31 @@ do { \
299429
#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
300430
#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
301431
#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
432+
#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
433+
#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
434+
#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
435+
#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
436+
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
437+
438+
#ifndef CONFIG_M386
439+
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
440+
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
441+
#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
442+
#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
443+
#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
444+
#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
445+
446+
#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
447+
#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
448+
#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
449+
#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
450+
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
451+
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
452+
453+
#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
454+
#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
455+
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
456+
#endif /* !CONFIG_M386 */
302457

303458
/*
304459
* Per cpu atomic 64 bit operations are only available under 64 bit.
@@ -311,19 +466,20 @@ do { \
311466
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
312467
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
313468
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
469+
#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
314470

315471
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
316472
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
317473
#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
318474
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
319475
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
320476
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
477+
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
321478

322479
#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
323480
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
324481
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
325482
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
326-
327483
#endif
328484

329485
/* This is not atomic against other CPUs -- CPU preemption needs to be off */

arch/x86/include/asm/processor.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,9 @@ extern __u32 cpu_caps_set[NCAPINTS];
141141
#ifdef CONFIG_SMP
142142
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
143143
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
144-
#define current_cpu_data __get_cpu_var(cpu_info)
145144
#else
145+
#define cpu_info boot_cpu_data
146146
#define cpu_data(cpu) boot_cpu_data
147-
#define current_cpu_data boot_cpu_data
148147
#endif
149148

150149
extern const struct seq_operations cpuinfo_op;

arch/x86/kernel/apic/apic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void)
516516
{
517517
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
518518

519-
if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
519+
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
520520
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
521521
/* Make LAPIC timer preferrable over percpu HPET */
522522
lapic_clockevent.rating = 150;

arch/x86/kernel/apic/io_apic.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
23292329
unsigned int irr;
23302330
struct irq_desc *desc;
23312331
struct irq_cfg *cfg;
2332-
irq = __get_cpu_var(vector_irq)[vector];
2332+
irq = __this_cpu_read(vector_irq[vector]);
23332333

23342334
if (irq == -1)
23352335
continue;
@@ -2363,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
23632363
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
23642364
goto unlock;
23652365
}
2366-
__get_cpu_var(vector_irq)[vector] = -1;
2366+
__this_cpu_write(vector_irq[vector], -1);
23672367
unlock:
23682368
raw_spin_unlock(&desc->lock);
23692369
}

arch/x86/kernel/apic/x2apic_uv_x.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
120120
else if (!strcmp(oem_table_id, "UVX"))
121121
uv_system_type = UV_X2APIC;
122122
else if (!strcmp(oem_table_id, "UVH")) {
123-
__get_cpu_var(x2apic_extra_bits) =
124-
pnodeid << uvh_apicid.s.pnode_shift;
123+
__this_cpu_write(x2apic_extra_bits,
124+
pnodeid << uvh_apicid.s.pnode_shift);
125125
uv_system_type = UV_NON_UNIQUE_APIC;
126126
uv_set_apicid_hibit();
127127
return 1;
@@ -286,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
286286
unsigned int id;
287287

288288
WARN_ON(preemptible() && num_online_cpus() > 1);
289-
id = x | __get_cpu_var(x2apic_extra_bits);
289+
id = x | __this_cpu_read(x2apic_extra_bits);
290290

291291
return id;
292292
}
@@ -378,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = {
378378

379379
static __cpuinit void set_x2apic_extra_bits(int pnode)
380380
{
381-
__get_cpu_var(x2apic_extra_bits) = (pnode << 6);
381+
__this_cpu_write(x2apic_extra_bits, (pnode << 6));
382382
}
383383

384384
/*

arch/x86/kernel/cpu/amd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383);
668668

669669
bool cpu_has_amd_erratum(const int *erratum)
670670
{
671-
struct cpuinfo_x86 *cpu = &current_cpu_data;
671+
struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
672672
int osvw_id = *erratum++;
673673
u32 range;
674674
u32 ms;

arch/x86/kernel/cpu/cpufreq/powernow-k8.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc)
521521

522522
*rc = -ENODEV;
523523

524-
if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
524+
if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
525525
return;
526526

527527
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
@@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
13771377
static void query_values_on_cpu(void *_err)
13781378
{
13791379
int *err = _err;
1380-
struct powernow_k8_data *data = __get_cpu_var(powernow_data);
1380+
struct powernow_k8_data *data = __this_cpu_read(powernow_data);
13811381

13821382
*err = query_current_values_with_pending_wait(data);
13831383
}

arch/x86/kernel/cpu/intel_cacheinfo.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
265265
line_size = l2.line_size;
266266
lines_per_tag = l2.lines_per_tag;
267267
/* cpu_data has errata corrections for K7 applied */
268-
size_in_kb = current_cpu_data.x86_cache_size;
268+
size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
269269
break;
270270
case 3:
271271
if (!l3.val)
@@ -287,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
287287
eax->split.type = types[leaf];
288288
eax->split.level = levels[leaf];
289289
eax->split.num_threads_sharing = 0;
290-
eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
290+
eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
291291

292292

293293
if (assoc == 0xffff)

0 commit comments

Comments
 (0)