20
20
#include <linux/mm.h>
21
21
#include <linux/smp.h>
22
22
#include <linux/uaccess.h>
23
+ #include <linux/cpumask.h>
23
24
#include <asm/cacheflush.h>
24
25
#include <asm/ptrace.h>
25
26
#include <asm/stacktrace.h>
@@ -36,10 +37,6 @@ int dtrace_getipl(void)
36
37
return in_interrupt ();
37
38
}
38
39
39
- static void dtrace_sync_func (void )
40
- {
41
- }
42
-
43
40
void dtrace_xcall (processorid_t cpu , dtrace_xcall_t func , void * arg )
44
41
{
45
42
if (cpu == DTRACE_CPUALL ) {
@@ -48,14 +45,133 @@ void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
48
45
smp_call_function_single (cpu , func , arg , 1 );
49
46
}
50
47
51
- void dtrace_sync (void )
48
+ void dtrace_toxic_ranges (void ( * func )( uintptr_t , uintptr_t ) )
52
49
{
53
- dtrace_xcall ( DTRACE_CPUALL , ( dtrace_xcall_t ) dtrace_sync_func , NULL );
50
+ /* FIXME */
54
51
}
55
52
56
- void dtrace_toxic_ranges (void (* func )(uintptr_t , uintptr_t ))
53
+ /*
54
+ * Note: not called from probe context. This function is called
55
+ * asynchronously (and at a regular interval) from outside of probe context
56
+ * by the DTrace framework to sync shared data which DTrace probe context
57
+ * may access without locks.
58
+ *
59
+ * Whenever the framework updates data which can be accessed from probe context,
60
+ * the framework then calls dtrace_sync(). dtrace_sync() guarantees all probes
61
+ * are using the new data before returning.
62
+ *
63
+ * See the comment in dtrace_impl.h which describes this algorithm.
64
+ * The cpuc_in_probe_ctxt flag is an increasing 16-bit count. It is odd when
65
+ * in DTrace probe context and even when not in DTrace probe context.
66
+ * The upper 15 bits are a counter which are incremented when exiting DTrace
67
+ * probe context. These upper 15 bits are used to detect "sample aliasing":
68
+ * i.e. the target CPU is not in DTrace probe context between samples but
69
+ * continually enters probe context just before being sampled.
70
+ *
71
+ * dtrace_sync() loops over NCPUs. CPUs which are not in DTrace probe context
72
+ * (cpuc_in_probe_ctxt is even) are removed from the list. This is repeated
73
+ * until there are no CPUs left in the sync list.
74
+ *
75
+ * In the rare cases where dtrace_sync() loops over all NCPUs more than
76
+ * dtrace_sync_sample_count times, dtrace_sync() then spins on one CPU's
77
+ * cpuc_in_probe_ctxt count until the count increments. This is intended to
78
+ * avoid sample aliasing.
79
+ */
80
+ void dtrace_sync (void )
57
81
{
58
- /* FIXME */
82
+ /*
83
+ * sync_cpus is a bitmap of CPUs that need to be synced with.
84
+ */
85
+ cpumask_t sync_cpus ;
86
+ uint64_t sample_count = 0 ;
87
+ int cpuid , sample_cpuid ;
88
+ int outstanding ;
89
+
90
+ /*
91
+ * Create bitmap of CPUs that need to be synced with.
92
+ */
93
+ cpumask_copy (& sync_cpus , cpu_online_mask );
94
+ outstanding = 0 ;
95
+ for_each_cpu (cpuid , & sync_cpus ) {
96
+ ++ outstanding ;
97
+
98
+ /*
99
+ * Set a flag to let the CPU know we are syncing with it.
100
+ */
101
+ DTRACE_SYNC_START (cpuid );
102
+ }
103
+
104
+ /*
105
+ * The preceding stores by DTRACE_SYNC_START() must complete before
106
+ * subsequent loads or stores. No membar is needed because the
107
+ * atomic-add operation in DTRACE_SYNC_START is a memory barrier on
108
+ * SPARC and X86.
109
+ */
110
+
111
+ while (outstanding > 0 ) {
112
+ /*
113
+ * Loop over the map of CPUs that need to be synced with.
114
+ */
115
+ for_each_cpu (cpuid , & sync_cpus ) {
116
+ if (!DTRACE_SYNC_IN_CRITICAL (cpuid )) {
117
+
118
+ /* Clear the CPU's sync request flag */
119
+ DTRACE_SYNC_END (cpuid );
120
+
121
+ /*
122
+ * remove cpuid from list of CPUs that
123
+ * still need to be synced with.
124
+ */
125
+ DTRACE_SYNC_DONE (cpuid , & sync_cpus );
126
+ -- outstanding ;
127
+ } else {
128
+ /*
129
+ * Remember one of the outstanding CPUs to spin
130
+ * on once we reach the sampling limit.
131
+ */
132
+ sample_cpuid = cpuid ;
133
+ }
134
+ }
135
+
136
+ /*
137
+ * dtrace_probe may be running in sibling threads in this core.
138
+ */
139
+ if (outstanding > 0 ) {
140
+ dtrace_safe_smt_pause ();
141
+
142
+ /*
143
+ * After sample_count loops, spin on one CPU's count
144
+ * instead of just checking for odd/even.
145
+ */
146
+ if (++ sample_count > dtrace_sync_sample_count ) {
147
+ uint64_t count =
148
+ DTRACE_SYNC_CRITICAL_COUNT (sample_cpuid );
149
+
150
+ /*
151
+ * Spin until critical section count increments.
152
+ */
153
+ if (DTRACE_SYNC_IN_CRITICAL (sample_cpuid )) {
154
+ while (count ==
155
+ DTRACE_SYNC_CRITICAL_COUNT (
156
+ sample_cpuid )) {
157
+
158
+ dtrace_safe_smt_pause ();
159
+ }
160
+ }
161
+
162
+ DTRACE_SYNC_END (sample_cpuid );
163
+ DTRACE_SYNC_DONE (sample_cpuid , & sync_cpus );
164
+ -- outstanding ;
165
+ }
166
+ }
167
+ }
168
+
169
+ /*
170
+ * All preceding loads by DTRACE_SYNC_IN_CRITICAL() and
171
+ * DTRACE_SYNC_CRITICAL_COUNT() must complete before subsequent loads
172
+ * or stores. No membar is needed because the atomic-add operation in
173
+ * DTRACE_SYNC_END() is a memory barrier on SPARC and X86.
174
+ */
59
175
}
60
176
61
177
/*
0 commit comments