@@ -1886,7 +1886,7 @@ binarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok)
18861886 return -1 ;
18871887}
18881888
1889- static Py_ssize_t
1889+ static int
18901890abinarysort (MergeState * ms , const sortslice * ss , Py_ssize_t n , Py_ssize_t ok , int adapt )
18911891{
18921892 Py_ssize_t k ; /* for IFLT macro expansion */
@@ -1982,7 +1982,7 @@ abinarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok, in
19821982 }
19831983
19841984 // Update Adaptive runvars
1985- std = L < mu ? mu - L : L - mu ;
1985+ std = labs ( L - mu ) ;
19861986 nbad += std ;
19871987 mu = L + L - last ;
19881988 mu = mu < 0 ? 0 : mu > ok ? ok : mu ;
@@ -2015,7 +2015,7 @@ abinarysort(MergeState *ms, const sortslice *ss, Py_ssize_t n, Py_ssize_t ok, in
20152015 }
20162016
20172017 // Update Adaptive runvars
2018- std = L < mu ? mu - L : L - mu ;
2018+ std = labs ( L - mu ) ;
20192019 nbad += std ;
20202020 mu = L + L - last ;
20212021 mu = mu < 0 ? 0 : mu > ok ? ok : mu ;
@@ -3220,11 +3220,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
32203220 */
32213221 // NOTE: Could turn on based on minlen or comparison type
32223222 int binary_adapt = ms .listlen >= 100 ;
3223- if (binary_adapt ) {
3224- int adapt = 0 ; // do not run binarysort adaptivity on 1st run
3225- Py_ssize_t cs = 0 ; // but do check goodness of adaptive fit
3226- Py_ssize_t cd = 1 ;
3227- Py_ssize_t abinres ;
3223+ if (!binary_adapt ) {
32283224 do {
32293225 /* Identify next run. */
32303226 Py_ssize_t n ;
@@ -3236,31 +3232,10 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
32363232 if (n < minrun ) {
32373233 const Py_ssize_t force = nremaining <= minrun ?
32383234 nremaining : minrun ;
3239- if (cs ) {
3240- if (binarysort (& ms , & lo , force , n ) < 0 )
3241- goto fail ;
3242- cs -= 1 ;
3243- }
3244- else {
3245- abinres = abinarysort (& ms , & lo , force , n , adapt );
3246- if (abinres < 0 )
3247- goto fail ;
3248- adapt = abinres < 250 ;
3249- if (adapt )
3250- cd = 1 ;
3251- else if (cd >= 9 )
3252- cs = cd = 11 ;
3253- else
3254- cs = cd = cd + 2 ;
3255- }
3235+ if (binarysort (& ms , & lo , force , n ) < 0 )
3236+ goto fail ;
32563237 n = force ;
32573238 }
3258- else {
3259- // After long monotonic run start adapting immediately
3260- adapt = 1 ;
3261- cs = 0 ;
3262- cd = 1 ;
3263- }
32643239 /* Maybe merge pending runs. */
32653240 assert (ms .n == 0 || ms .pending [ms .n - 1 ].base .keys +
32663241 ms .pending [ms .n - 1 ].len == lo .keys );
@@ -3277,6 +3252,16 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
32773252 } while (nremaining );
32783253 }
32793254 else {
3255+ // NOTE:WIP: Only 1% out of 6% worst case is due to
3256+ // extra calculations in simple binary sort
3257+ // removing big branch in `abinarysort` also has not effect
3258+ // this has something to do with higher level branch prediction
3259+ // doing if (0) removes only 1% extra == 2%
3260+ // and commenting out code still 2% slower...???
3261+ int adapt = 0 ; // do not run binarysort adaptivity on 1st run
3262+ int cs = 0 ; // but do check goodness of adaptive fit
3263+ int cd = 1 ;
3264+ int abinret ;
32803265 do {
32813266 /* Identify next run. */
32823267 Py_ssize_t n ;
@@ -3288,10 +3273,31 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
32883273 if (n < minrun ) {
32893274 const Py_ssize_t force = nremaining <= minrun ?
32903275 nremaining : minrun ;
3291- if (binarysort (& ms , & lo , force , n ) < 0 )
3292- goto fail ;
3276+ if (cs ) {
3277+ if (binarysort (& ms , & lo , force , n ) < 0 )
3278+ goto fail ;
3279+ cs -= 1 ;
3280+ }
3281+ else {
3282+ abinret = abinarysort (& ms , & lo , force , n , adapt );
3283+ if (abinret < 0 )
3284+ goto fail ;
3285+ adapt = abinret < 250 ;
3286+ if (adapt )
3287+ cd = 1 ;
3288+ else if (cd >= 9 )
3289+ cs = cd = 11 ;
3290+ else
3291+ cs = cd = cd + 2 ;
3292+ }
32933293 n = force ;
32943294 }
3295+ else {
3296+ // After long monotonic run start adapting immediately
3297+ adapt = 1 ;
3298+ cs = 0 ;
3299+ cd = 1 ;
3300+ }
32953301 /* Maybe merge pending runs. */
32963302 assert (ms .n == 0 || ms .pending [ms .n - 1 ].base .keys +
32973303 ms .pending [ms .n - 1 ].len == lo .keys );
0 commit comments