Skip to content

Performance increase rolling min max #19549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Feb 14, 2018
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pandas/_libs/src/headers/cmath
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef _PANDAS_MATH_H_
#define _PANDAS_MATH_H_

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add commets here on why we have this file (so the next person doesn't go thru the same as you :>)

#if defined(_MSC_VER) && (_MSC_VER < 1800)
#include <cmath>
namespace std {
__inline int signbit(double num) { return _copysign(1.0, num) < 0; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would revert this to use math.h - technically extending namespace std is undefined behavior so you'd need to do it a different way

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that's the right direction to go, cmath is what you use when you write C++ code. math.h doesn't work either when compiling in clang. This works in all three compilers even though it's not the most ideal (MSVC doesn't have this defined for older versions which is annoying).

}
#else
#include <cmath>
#endif

#endif
11 changes: 0 additions & 11 deletions pandas/_libs/src/headers/math.h

This file was deleted.

49 changes: 31 additions & 18 deletions pandas/_libs/window.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

cimport cython
from cython cimport Py_ssize_t
from libcpp.deque cimport deque

from libc.stdlib cimport malloc, free

Expand All @@ -12,7 +13,7 @@ from numpy cimport ndarray, double_t, int64_t, float64_t
cnp.import_array()


cdef extern from "../src/headers/math.h":
cdef extern from "../src/headers/cmath" namespace "std":
int signbit(double) nogil
double sqrt(double x) nogil

Expand Down Expand Up @@ -1222,8 +1223,9 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,
cdef:
numeric ai
bint is_variable, should_replace
int64_t s, e, N, i, j, removed
int64_t N, i, removed, window_i
Py_ssize_t nobs = 0
deque Q[int64_t]
ndarray[int64_t] starti, endi
ndarray[numeric, ndim=1] output
cdef:
Expand All @@ -1242,32 +1244,43 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp,

output = np.empty(N, dtype=input.dtype)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a description of the algorithm and a link (if available)

Q = deque[int64_t]()

if is_variable:

with nogil:

for i in range(N):
s = starti[i]
e = endi[i]
for i from starti[0] <= i < endi[0]:
ai = init_mm(input[i], &nobs, is_max)

r = input[s]
nobs = 0
for j in range(s, e):
if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()
Q.push_back(i)

# adds, death at the i offset
ai = init_mm(input[j], &nobs, is_max)
for i from endi[0] <= i < N:
output[i-1] = calc_mm(minp, nobs, input[Q.front()])

if is_max:
if ai > r:
r = ai
else:
if ai < r:
r = ai
ai = init_mm(input[i], &nobs, is_max)

output[i] = calc_mm(minp, nobs, r)
if is_max:
while not Q.empty() and ai >= input[Q.back()]:
Q.pop_back()
else:
while not Q.empty() and ai <= input[Q.back()]:
Q.pop_back()

else:
while not Q.empty() and Q.front() <= i - (endi[i] - starti[i]):
Q.pop_front()

Q.push_back(i)

output[N-1] = calc_mm(minp, nobs, input[Q.front()])

else:
# setup the rings of death!
ring = <numeric *>malloc(win * sizeof(numeric))
death = <int64_t *>malloc(win * sizeof(int64_t))
Expand Down
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,8 @@ def pxd(name):
'pyxfile': '_libs/testing'},
'_libs.window': {
'pyxfile': '_libs/window',
'pxdfiles': ['_libs/skiplist', '_libs/src/util']},
'pxdfiles': ['_libs/skiplist', '_libs/src/util'],
'language': 'c++'},
'_libs.writers': {
'pyxfile': '_libs/writers',
'pxdfiles': ['_libs/src/util']},
Expand All @@ -640,11 +641,11 @@ def pxd(name):
sources=sources,
depends=data.get('depends', []),
include_dirs=include,
language=data.get('language', 'c'),
extra_compile_args=extra_compile_args)

extensions.append(obj)


# ----------------------------------------------------------------------
# msgpack

Expand Down