@@ -1220,6 +1220,13 @@ struct s_MergeState {
1220
1220
* of tuples. It may be set to safe_object_compare, but the idea is that hopefully
1221
1221
* we can assume more, and use one of the special-case compares. */
1222
1222
int (* tuple_elem_compare )(PyObject * , PyObject * , MergeState * );
1223
+
1224
+ /* Used by unsafe_tuple_compare to record whether the very first tuple
1225
+ * elements resolved the last comparison attempt. If so, next time a
1226
+ * method that may avoid PyObject_RichCompareBool() entirely is tried.
1227
+ * 0 for false, 1 for true.
1228
+ */
1229
+ int first_tuple_items_resolved_it ;
1223
1230
};
1224
1231
1225
1232
/* binarysort is the best method for sorting small arrays: it does
@@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
2190
2197
* using the same pre-sort check as we use for ms->key_compare,
2191
2198
* but run on the list [x[0] for x in L]. This allows us to optimize compares
2192
2199
* on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
2193
- * that most tuple compares don't involve x[1:]. */
2200
+ * that most tuple compares don't involve x[1:].
2201
+ * However, that may not be right. When it is right, we can win by calling the
2202
+ * relatively cheap ms->tuple_elem_compare on the first pair of elements, to
2203
+ * see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done.
2204
+ * Else we proceed as in the tuple compare, comparing the remaining pairs via
2205
+ * the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if
2206
+ * ever) that says "no, not equal!". Then, if we're still on the first pair,
2207
+ * ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(...,
2208
+ * Py_LT) finishes the job.
2209
+ * In any case, ms->first_tuple_items_resolved_it keeps track of whether the
2210
+ * most recent tuple comparison was resolved by the first pair. If so, the
2211
+ * next attempt starts by trying the cheap tests on the first pair again, else
2212
+ * PyObject_RichCompareBool(..., Py_EQ) is used from the start.
2213
+ * There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper!
2214
+ * For example, that can return "almost immediately" if passed the same
2215
+ * object twice (it special-cases object identity for Py_EQ), which can,
2216
+ * potentially, be unboundedly faster than ms->tuple_elem_compare.
2217
+ */
2194
2218
static int
2195
2219
unsafe_tuple_compare (PyObject * v , PyObject * w , MergeState * ms )
2196
2220
{
@@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
2206
2230
2207
2231
vt = (PyTupleObject * )v ;
2208
2232
wt = (PyTupleObject * )w ;
2233
+ i = 0 ;
2234
+ if (ms -> first_tuple_items_resolved_it ) {
2235
+ /* See whether fast compares of the first elements settle it. */
2236
+ k = ms -> tuple_elem_compare (vt -> ob_item [0 ], wt -> ob_item [0 ], ms );
2237
+ if (k ) /* error, or v < w */
2238
+ return k ;
2239
+ k = ms -> tuple_elem_compare (wt -> ob_item [0 ], vt -> ob_item [0 ], ms );
2240
+ if (k > 0 ) /* w < v */
2241
+ return 0 ;
2242
+ if (k < 0 ) /* error */
2243
+ return -1 ;
2244
+ /* We have
2245
+ * not (v[0] < w[0]) and not (w[0] < v[0])
2246
+ * which implies, for a total order, that the first elements are
2247
+ * equal. So skip them in the loop.
2248
+ */
2249
+ i = 1 ;
2250
+ ms -> first_tuple_items_resolved_it = 0 ;
2251
+ }
2252
+ /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0
2253
+ * at the end of the `if` block just above.
2254
+ */
2255
+ assert (! ms -> first_tuple_items_resolved_it );
2209
2256
2210
2257
vlen = Py_SIZE (vt );
2211
2258
wlen = Py_SIZE (wt );
2212
-
2213
- for (i = 0 ; i < vlen && i < wlen ; i ++ ) {
2259
+ for (; i < vlen && i < wlen ; i ++ ) {
2214
2260
k = PyObject_RichCompareBool (vt -> ob_item [i ], wt -> ob_item [i ], Py_EQ );
2261
+ if (!k ) { /* not equal */
2262
+ if (i ) {
2263
+ return PyObject_RichCompareBool (vt -> ob_item [i ], wt -> ob_item [i ],
2264
+ Py_LT );
2265
+ }
2266
+ else {
2267
+ ms -> first_tuple_items_resolved_it = 1 ;
2268
+ return ms -> tuple_elem_compare (vt -> ob_item [0 ], wt -> ob_item [0 ],
2269
+ ms );
2270
+ }
2271
+ }
2215
2272
if (k < 0 )
2216
2273
return -1 ;
2217
- if (!k )
2218
- break ;
2219
2274
}
2275
+ /* all equal until we fell off the end */
2276
+ return vlen < wlen ;
2220
2277
2221
- if (i >= vlen || i >= wlen )
2222
- return vlen < wlen ;
2223
-
2224
- if (i == 0 )
2225
- return ms -> tuple_elem_compare (vt -> ob_item [i ], wt -> ob_item [i ], ms );
2226
- else
2227
- return PyObject_RichCompareBool (vt -> ob_item [i ], wt -> ob_item [i ], Py_LT );
2228
- }
2278
+ }
2229
2279
2230
2280
/* An adaptive, stable, natural mergesort. See listsort.txt.
2231
2281
* Returns Py_None on success, NULL on error. Even in case of error, the
@@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
2408
2458
}
2409
2459
2410
2460
ms .key_compare = unsafe_tuple_compare ;
2461
+ ms .first_tuple_items_resolved_it = 1 ; /* be optimistic */
2411
2462
}
2412
2463
}
2413
2464
/* End of pre-sort check: ms is now set properly! */
0 commit comments