@@ -176,6 +176,20 @@ pub fn time_ext<T, F>(do_it: bool, sess: Option<&Session>, what: &str, f: F) ->
176
176
profq_msg ( sess, ProfileQueriesMsg :: TimeBegin ( what. to_string ( ) ) )
177
177
}
178
178
}
179
+
180
+ #[ cfg( not( all( windows, parallel_queries, any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ) ]
181
+ let rv = time_impl ( sess, what, f) ;
182
+ #[ cfg( all( windows, parallel_queries, any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ]
183
+ let rv = time_threads_impl ( sess, what, f) ;
184
+
185
+ TIME_DEPTH . with ( |slot| slot. set ( old) ) ;
186
+
187
+ rv
188
+ }
189
+
190
+ fn time_impl < T , F > ( sess : Option < & Session > , what : & str , f : F ) -> T where
191
+ F : FnOnce ( ) -> T ,
192
+ {
179
193
let start = Instant :: now ( ) ;
180
194
let rv = f ( ) ;
181
195
let dur = start. elapsed ( ) ;
@@ -184,12 +198,98 @@ pub fn time_ext<T, F>(do_it: bool, sess: Option<&Session>, what: &str, f: F) ->
184
198
profq_msg ( sess, ProfileQueriesMsg :: TimeEnd )
185
199
}
186
200
}
201
+ print_time_passes_entry_internal ( what, duration_to_secs_str ( dur) ) ;
202
+ rv
203
+ }
187
204
188
- print_time_passes_entry_internal ( what, dur) ;
205
+ #[ cfg( all( windows, parallel_queries, any( target_arch = "x86" , target_arch = "x86_64" ) ) ) ]
206
+ fn time_threads_impl < T , F > ( sess : Option < & Session > , what : & str , f : F ) -> T where
207
+ F : FnOnce ( ) -> T ,
208
+ {
209
+ use rayon_core:: registry;
210
+ use std:: iter;
211
+ use winapi;
212
+ use kernel32;
189
213
190
- TIME_DEPTH . with ( |slot| slot. set ( old) ) ;
214
+ #[ allow( unused_mut) ]
215
+ fn read_counter ( ) -> u64 {
216
+ let mut low: u32 ;
217
+ let mut high: u32 ;
191
218
192
- rv
219
+ unsafe {
220
+ asm ! ( "xor %rax, %rax; cpuid; rdtsc"
221
+ : "={eax}" ( low) , "={edx}" ( high) :: "memory" , "rbx" , "rcx" ) ;
222
+ }
223
+
224
+ ( ( high as u64 ) << 32 ) | ( low as u64 )
225
+ }
226
+
227
+ let registry = registry:: get_current_registry ( ) ;
228
+ if let Some ( registry) = registry {
229
+ let freq = unsafe {
230
+ let mut freq = 0 ;
231
+ assert ! ( kernel32:: QueryPerformanceFrequency ( & mut freq) == winapi:: TRUE ) ;
232
+ freq as u64 * 1000
233
+ } ;
234
+
235
+ let threads: Vec < _ > = {
236
+ let threads = registry. handles . lock ( ) ;
237
+ let current = unsafe {
238
+ iter:: once ( kernel32:: GetCurrentThread ( ) )
239
+ } ;
240
+ current. chain ( threads. iter ( ) . map ( |t| t. 0 ) ) . collect ( )
241
+ } ;
242
+ let mut begin: Vec < u64 > = iter:: repeat ( 0 ) . take ( threads. len ( ) ) . collect ( ) ;
243
+ let mut end: Vec < u64 > = iter:: repeat ( 0 ) . take ( threads. len ( ) ) . collect ( ) ;
244
+ for ( i, & handle) in threads. iter ( ) . enumerate ( ) {
245
+ unsafe {
246
+ assert ! ( kernel32:: QueryThreadCycleTime ( handle, & mut begin[ i] ) == winapi:: TRUE ) ;
247
+ }
248
+ }
249
+
250
+ let time_start = read_counter ( ) ;
251
+ let result = f ( ) ;
252
+ let time_end = read_counter ( ) ;
253
+ for ( i, & handle) in threads. iter ( ) . enumerate ( ) {
254
+ unsafe {
255
+ assert ! ( kernel32:: QueryThreadCycleTime ( handle, & mut end[ i] ) == winapi:: TRUE ) ;
256
+ }
257
+ }
258
+ if let Some ( sess) = sess {
259
+ if cfg ! ( debug_assertions) {
260
+ profq_msg ( sess, ProfileQueriesMsg :: TimeEnd )
261
+ }
262
+ }
263
+ let time = time_end - time_start;
264
+ let time_secs = time as f64 / freq as f64 ;
265
+
266
+ let thread_times: Vec < u64 > = end. iter ( ) . zip ( begin. iter ( ) ) . map ( |( e, b) | * e - * b) . collect ( ) ;
267
+
268
+ let total_thread_time: u64 = thread_times. iter ( ) . cloned ( ) . sum ( ) ;
269
+ let core_usage = total_thread_time as f64 / time as f64 ;
270
+
271
+ let mut data = format ! ( "{:.6} - cores {:.2}x - cpu {:.2} - threads (" ,
272
+ time_secs,
273
+ core_usage,
274
+ core_usage / ( thread_times. len( ) - 1 ) as f64 ) ;
275
+
276
+ for ( i, thread_time) in thread_times. into_iter ( ) . enumerate ( ) {
277
+ data. push_str ( & format ! ( "{:.2}" , thread_time as f64 / time as f64 ) ) ;
278
+ if i == 0 {
279
+ data. push_str ( " - " ) ;
280
+ }
281
+ else if i < begin. len ( ) - 1 {
282
+ data. push_str ( " " ) ;
283
+ }
284
+ }
285
+
286
+ data. push_str ( ")" ) ;
287
+
288
+ print_time_passes_entry_internal ( what, data) ;
289
+ result
290
+ } else {
291
+ time_impl ( sess, what, f)
292
+ }
193
293
}
194
294
195
295
pub fn print_time_passes_entry ( do_it : bool , what : & str , dur : Duration ) {
@@ -203,12 +303,12 @@ pub fn print_time_passes_entry(do_it: bool, what: &str, dur: Duration) {
203
303
r
204
304
} ) ;
205
305
206
- print_time_passes_entry_internal ( what, dur) ;
306
+ print_time_passes_entry_internal ( what, duration_to_secs_str ( dur) ) ;
207
307
208
308
TIME_DEPTH . with ( |slot| slot. set ( old) ) ;
209
309
}
210
310
211
- fn print_time_passes_entry_internal ( what : & str , dur : Duration ) {
311
+ fn print_time_passes_entry_internal ( what : & str , data : String ) {
212
312
let indentation = TIME_DEPTH . with ( |slot| slot. get ( ) ) ;
213
313
214
314
let mem_string = match get_resident ( ) {
@@ -220,7 +320,7 @@ fn print_time_passes_entry_internal(what: &str, dur: Duration) {
220
320
} ;
221
321
println ! ( "{}time: {}{}\t {}" ,
222
322
repeat( " " ) . take( indentation) . collect:: <String >( ) ,
223
- duration_to_secs_str ( dur ) ,
323
+ data ,
224
324
mem_string,
225
325
what) ;
226
326
}
@@ -232,7 +332,7 @@ pub fn duration_to_secs_str(dur: Duration) -> String {
232
332
let secs = dur. as_secs ( ) as f64 +
233
333
dur. subsec_nanos ( ) as f64 / NANOS_PER_SEC ;
234
334
235
- format ! ( "{:.3 }" , secs)
335
+ format ! ( "{:.6 }" , secs)
236
336
}
237
337
238
338
pub fn to_readable_str ( mut val : usize ) -> String {
0 commit comments