@@ -1856,9 +1856,15 @@ static int ir_edge_info_cmp(const void *b1, const void *b2)
1856
1856
if (e1 -> freq != e2 -> freq ) {
1857
1857
return e1 -> freq < e2 -> freq ? 1 : -1 ;
1858
1858
}
1859
- /* In case of equal frequences prefer to keep the existing order */
1859
+ /* In case of equal frequencies, try to avoid penalization of one of the "equal" paths by
1860
+ * preferring the first RPO successor (in conditional branches) and the last RPO predecessor
1861
+ * (in merge points).
1862
+ *
1863
+ * See "Static Basic Block Reordering Heuristics for Implicit Control Flow in Baseline JITs"
1864
+ * Polito Guillermo, Ducasse Stephane, and Tesone Pablo (2021)
1865
+ */
1860
1866
if (e1 -> from != e2 -> from ) {
1861
- return e1 -> from - e2 -> from ;
1867
+ return e2 -> from - e1 -> from ;
1862
1868
} else {
1863
1869
return e1 -> to - e2 -> to ;
1864
1870
}
@@ -2037,7 +2043,7 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
2037
2043
chains [b ].prev = b ;
2038
2044
}
2039
2045
2040
- /* 2. Collect information about BBs and EDGEs freqeuncies */
2046
+ /* 2. Collect information about BBs and EDGEs frequencies */
2041
2047
edges = ir_mem_malloc (sizeof (ir_edge_info ) * max_edges_count );
2042
2048
bb_freq = ir_mem_calloc (ctx -> cfg_blocks_count + 1 , sizeof (float ));
2043
2049
bb_freq [1 ] = 1.0f ;
@@ -2052,8 +2058,8 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
2052
2058
uint32_t * p = ctx -> cfg_edges + bb -> predecessors ;
2053
2059
for (; n > 0 ; p ++ , n -- ) {
2054
2060
uint32_t predecessor = * p ;
2055
- /* Basic Blocks are ordered in a way that usual predecessors ids are less then successors.
2056
- * So we may comapre blocks ids (predecessor < b) instead of a more expensive check for back edge
2061
+ /* Basic Blocks are ordered in a way that usual predecessors ids are less than successors.
2062
+ * So we may compare blocks ids (predecessor < b) instead of a more expensive check for back edge
2057
2063
* (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b)
2058
2064
*/
2059
2065
if (predecessor < b ) {
@@ -2255,14 +2261,14 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
2255
2261
ir_bitqueue_free (& worklist );
2256
2262
ir_mem_free (visited );
2257
2263
2258
- /* 2. Sort EDGEs according to their frequentcies */
2264
+ /* 2. Sort EDGEs according to their frequencies */
2259
2265
qsort (edges , edges_count , sizeof (ir_edge_info ), ir_edge_info_cmp );
2260
2266
2261
2267
#if IR_DEBUG_BB_SCHEDULE_EDGES
2262
2268
ir_dump_edges (ctx , edges_count , edges );
2263
2269
#endif
2264
2270
2265
- /* 3. Process EDGEs in the decrising frequentcy order and join the connected chains */
2271
+ /* 3. Process EDGEs in the decreasing frequency order and join the connected chains */
2266
2272
for (e = edges , i = edges_count ; i > 0 ; e ++ , i -- ) {
2267
2273
uint32_t dst = chains [e -> to ].head ;
2268
2274
if (dst == e -> to ) {
@@ -2341,8 +2347,8 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
2341
2347
#endif
2342
2348
}
2343
2349
2344
- /* 5. Group chains accoring to the most frequnt edge between them */
2345
- // TODO: Try to find a better heuristc
2350
+ /* 5. Group chains according to the most frequent edge between them */
2351
+ // TODO: Try to find a better heuristic
2346
2352
for (e = edges , i = edges_count ; i > 0 ; e ++ , i -- ) {
2347
2353
#if !IR_DEBUG_BB_SCHEDULE_GRAPH
2348
2354
if (!e -> from ) continue ;
@@ -2525,8 +2531,14 @@ int ir_schedule_blocks(ir_ctx *ctx)
2525
2531
{
2526
2532
if (ctx -> cfg_blocks_count <= 2 ) {
2527
2533
return 1 ;
2528
- // TODO: make the choise between top-down and bottom-up algorithm configurable
2529
- } else if (UNEXPECTED (ctx -> flags2 & IR_IRREDUCIBLE_CFG ) || ctx -> cfg_blocks_count > 256 ) {
2534
+ }
2535
+
2536
+ /* The bottom-up Pettis-Hansen algorithm is expensive - O(n^3),
2537
+ * use it only for relatively small functions.
2538
+ *
2539
+ * TODO: make the choice between top-down and bottom-up algorithm configurable
2540
+ */
2541
+ if (UNEXPECTED (ctx -> flags2 & IR_IRREDUCIBLE_CFG ) || ctx -> cfg_blocks_count > 256 ) {
2530
2542
return ir_schedule_blocks_top_down (ctx );
2531
2543
} else {
2532
2544
return ir_schedule_blocks_bottom_up (ctx );
0 commit comments