-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Center rolling window for time offset #38780
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 38 commits
11c1bb7
3cac891
c73ffc4
58a8ebf
dca9d04
37cb6fa
81e0e4e
463c7f0
321f07c
9270bab
dd33e32
6e4da84
e0966e8
b82f514
18a7b5b
9c4cc58
c27f50e
abaa43b
dc046da
95e3f26
8d582a1
8d5a55c
525cc69
6ac79b9
9bf6ce3
4f98fc5
e5ae3b2
d106940
d4f6d22
c2a7333
73313e6
92f8992
648d2d3
278d33f
5e50f36
c11cf15
2e3f875
f63309b
9f76a41
f05ed61
6fbd080
dff6942
0520e18
a087a6b
5b9b8ff
fca3b4d
6c1c58a
b7e5035
f44c6e6
3dcad64
0f9f6df
315b320
f7d1110
fc88ae4
e07a1f2
cefbb16
edbfd21
bfc0f0d
43e04ed
1e724dc
47a3b14
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,29 +127,94 @@ def test_closed_fixed(closed, arithmetic_win_operators): | |
df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) | ||
df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) | ||
|
||
result = getattr(df_fixed.rolling(2, closed=closed, min_periods=1), func_name)() | ||
expected = getattr(df_time.rolling("2D", closed=closed), func_name)().reset_index( | ||
drop=True | ||
) | ||
result = getattr( | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
df_fixed.rolling(2, closed=closed, min_periods=1, center=False), func_name | ||
)() | ||
expected = getattr( | ||
df_time.rolling("2D", closed=closed, min_periods=1, center=False), func_name | ||
)().reset_index(drop=True) | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_closed_fixed_binary_col(): | ||
def test_datetimelike_centered_selections(closed, arithmetic_win_operators): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you paramterize on closed (e.g. directly put your window selection up there with the closed parameter), eg.. don't use the fixture There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea, this is done, too. |
||
# GH 34315 | ||
func_name = arithmetic_win_operators | ||
df_time = DataFrame( | ||
{"A": [0.0, 1.0, 2.0, 3.0, 4.0]}, index=date_range("2020", periods=5) | ||
) | ||
|
||
if closed == "both": | ||
window_selections = [ | ||
[True, True, False, False, False], | ||
[True, True, True, False, False], | ||
[False, True, True, True, False], | ||
[False, False, True, True, True], | ||
[False, False, False, True, True], | ||
] | ||
elif closed == "left": | ||
window_selections = [ | ||
[True, False, False, False, False], | ||
[True, True, False, False, False], | ||
[False, True, True, False, False], | ||
[False, False, True, True, False], | ||
[False, False, False, True, True], | ||
] | ||
elif closed == "right": | ||
window_selections = [ | ||
[True, True, False, False, False], | ||
[False, True, True, False, False], | ||
[False, False, True, True, False], | ||
[False, False, False, True, True], | ||
[False, False, False, False, True], | ||
] | ||
else: # closed=="neither" | ||
window_selections = [ | ||
[True, False, False, False, False], | ||
[False, True, False, False, False], | ||
[False, False, True, False, False], | ||
[False, False, False, True, False], | ||
[False, False, False, False, True], | ||
] | ||
|
||
expected = DataFrame( | ||
{"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]}, | ||
index=date_range("2020", periods=5), | ||
) | ||
|
||
if func_name == "sem": | ||
kwargs = {"ddof": 0} | ||
else: | ||
kwargs = {} | ||
|
||
result = getattr( | ||
df_time.rolling("2D", closed=closed, min_periods=1, center=True), func_name | ||
)(**kwargs) | ||
|
||
tm.assert_frame_equal(result, expected, check_dtype=False) | ||
|
||
|
||
def test_closed_fixed_binary_col(center): | ||
# GH 34315 | ||
data = [0, 1, 1, 0, 0, 1, 0, 1] | ||
df = DataFrame( | ||
{"binary_col": data}, | ||
index=date_range(start="2020-01-01", freq="min", periods=len(data)), | ||
) | ||
|
||
rolling = df.rolling(window=len(df), closed="left", min_periods=1) | ||
result = rolling.mean() | ||
if center: | ||
expected_data = [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5] | ||
else: | ||
expected_data = [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571] | ||
|
||
expected = DataFrame( | ||
[np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], | ||
expected_data, | ||
columns=["binary_col"], | ||
index=date_range(start="2020-01-01", freq="min", periods=len(data)), | ||
index=date_range(start="2020-01-01", freq="min", periods=len(expected_data)), | ||
) | ||
|
||
rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center) | ||
result = rolling.mean() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
|
@@ -394,7 +459,7 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): | |
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_rolling_window_as_string(): | ||
def test_rolling_window_as_string(center): | ||
# see gh-22590 | ||
date_today = datetime.now() | ||
days = date_range(date_today, date_today + timedelta(365), freq="D") | ||
|
@@ -405,50 +470,94 @@ def test_rolling_window_as_string(): | |
df = DataFrame({"DateCol": days, "metric": data}) | ||
|
||
df.set_index("DateCol", inplace=True) | ||
result = df.rolling(window="21D", min_periods=2, closed="left")["metric"].agg("max") | ||
|
||
expData = ( | ||
[np.nan] * 2 | ||
+ [88.0] * 16 | ||
+ [97.0] * 9 | ||
+ [98.0] | ||
+ [99.0] * 21 | ||
+ [95.0] * 16 | ||
+ [93.0] * 5 | ||
+ [89.0] * 5 | ||
+ [96.0] * 21 | ||
+ [94.0] * 14 | ||
+ [90.0] * 13 | ||
+ [88.0] * 2 | ||
+ [90.0] * 9 | ||
+ [96.0] * 21 | ||
+ [95.0] * 6 | ||
+ [91.0] | ||
+ [87.0] * 6 | ||
+ [92.0] * 21 | ||
+ [83.0] * 2 | ||
+ [86.0] * 10 | ||
+ [87.0] * 5 | ||
+ [98.0] * 21 | ||
+ [97.0] * 14 | ||
+ [93.0] * 7 | ||
+ [87.0] * 4 | ||
+ [86.0] * 4 | ||
+ [95.0] * 21 | ||
+ [85.0] * 14 | ||
+ [83.0] * 2 | ||
+ [76.0] * 5 | ||
+ [81.0] * 2 | ||
+ [98.0] * 21 | ||
+ [95.0] * 14 | ||
+ [91.0] * 7 | ||
+ [86.0] | ||
+ [93.0] * 3 | ||
+ [95.0] * 20 | ||
) | ||
result = df.rolling(window="21D", min_periods=2, closed="left", center=center)[ | ||
"metric" | ||
].agg("max") | ||
|
||
if center: | ||
expected_data = ( | ||
[88.0] * 7 | ||
+ [97.0] * 9 | ||
+ [98.0] | ||
+ [99.0] * 21 | ||
+ [95.0] * 16 | ||
+ [93.0] * 5 | ||
+ [89.0] * 5 | ||
+ [96.0] * 21 | ||
+ [94.0] * 14 | ||
+ [90.0] * 13 | ||
+ [88.0] * 2 | ||
+ [90.0] * 9 | ||
+ [96.0] * 21 | ||
+ [95.0] * 6 | ||
+ [91.0] | ||
+ [87.0] * 6 | ||
+ [92.0] * 21 | ||
+ [83.0] * 2 | ||
+ [86.0] * 10 | ||
+ [87.0] * 5 | ||
+ [98.0] * 21 | ||
+ [97.0] * 14 | ||
+ [93.0] * 7 | ||
+ [87.0] * 4 | ||
+ [86.0] * 4 | ||
+ [95.0] * 21 | ||
+ [85.0] * 14 | ||
+ [83.0] * 2 | ||
+ [76.0] * 5 | ||
+ [81.0] * 2 | ||
+ [98.0] * 21 | ||
+ [95.0] * 14 | ||
+ [91.0] * 7 | ||
+ [86.0] | ||
+ [93.0] * 3 | ||
+ [95.0] * 29 | ||
+ [77.0] * 2 | ||
) | ||
|
||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if doing this pls paramterize outside the function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! |
||
expected_data = ( | ||
[np.nan] * 2 | ||
+ [88.0] * 16 | ||
+ [97.0] * 9 | ||
+ [98.0] | ||
+ [99.0] * 21 | ||
+ [95.0] * 16 | ||
+ [93.0] * 5 | ||
+ [89.0] * 5 | ||
+ [96.0] * 21 | ||
+ [94.0] * 14 | ||
+ [90.0] * 13 | ||
+ [88.0] * 2 | ||
+ [90.0] * 9 | ||
+ [96.0] * 21 | ||
+ [95.0] * 6 | ||
+ [91.0] | ||
+ [87.0] * 6 | ||
+ [92.0] * 21 | ||
+ [83.0] * 2 | ||
+ [86.0] * 10 | ||
+ [87.0] * 5 | ||
+ [98.0] * 21 | ||
+ [97.0] * 14 | ||
+ [93.0] * 7 | ||
+ [87.0] * 4 | ||
+ [86.0] * 4 | ||
+ [95.0] * 21 | ||
+ [85.0] * 14 | ||
+ [83.0] * 2 | ||
+ [76.0] * 5 | ||
+ [81.0] * 2 | ||
+ [98.0] * 21 | ||
+ [95.0] * 14 | ||
+ [91.0] * 7 | ||
+ [86.0] | ||
+ [93.0] * 3 | ||
+ [95.0] * 20 | ||
) | ||
|
||
expected = Series( | ||
expData, index=days.rename("DateCol")._with_freq(None), name="metric" | ||
expected_data, index=days.rename("DateCol")._with_freq(None), name="metric" | ||
) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
@@ -887,7 +996,7 @@ def test_rolling_sem(frame_or_series): | |
result = obj.rolling(2, min_periods=1).sem() | ||
if isinstance(result, DataFrame): | ||
result = Series(result[0].values) | ||
expected = Series([np.nan] + [0.707107] * 2) | ||
expected = Series([np.nan] + [0.7071067811865476] * 2) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.