Skip to content

Commit b05ba8c

Browse files
Issue/blend forecasts (#163)
* first version of blend * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * lint * add adjust_mw to blend * add model for updateing forecast values latest * update blend * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix merge * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent cb9e3f2 commit b05ba8c

File tree

5 files changed

+159
-26
lines changed

5 files changed

+159
-26
lines changed

nowcasting_datamodel/read/blend.py

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ def get_blend_forecast_values_latest(
222222
return forecast_values
223223

224224

225-
def make_weights_df(model_names, weights, start_datetime_now=None):
225+
def make_weights_df(model_names, weights, start_datetime=None):
226226
"""Makes weights to half an hour and blocks
227227
228228
A pd data frame like
@@ -254,17 +254,20 @@ def make_weights_df(model_names, weights, start_datetime_now=None):
254254
},
255255
]
256256
# get time now rounded up to 30 mins
257-
if start_datetime_now is None:
258-
start_datetime_now = datetime.now().replace(second=0, microsecond=0)
259-
if start_datetime_now.minute >= 30:
260-
start_datetime_now += timedelta(hours=1)
261-
start_datetime_now = start_datetime_now.replace(minute=00)
262-
else:
263-
start_datetime_now = start_datetime_now.replace(minute=30)
257+
start_datetime_now = datetime.now(tz=timezone.utc).replace(second=0, microsecond=0)
258+
if start_datetime_now.minute >= 30:
259+
start_datetime_now += timedelta(hours=1)
260+
start_datetime_now = start_datetime_now.replace(minute=00)
261+
else:
262+
start_datetime_now = start_datetime_now.replace(minute=30)
263+
264+
if start_datetime is None:
265+
start_datetime = start_datetime_now
264266

265267
# make dataframe of 8 hours in 30 minutes chunks from now
266268
weights_all_df = []
267-
for weight in weights:
269+
for i in range(len(weights)):
270+
weight = weights[i]
268271
if "start_horizon_hour" not in weight:
269272
start_horizon_hour = 0
270273
else:
@@ -283,41 +286,55 @@ def make_weights_df(model_names, weights, start_datetime_now=None):
283286
else:
284287
end_weight = weight["end_weight"]
285288

289+
# add the first weight to the dataframe, from start_datetime to start_datetime_now.
290+
# This could be for the two days before now
291+
if i == 0:
292+
weights_df = pd.DataFrame(
293+
index=pd.date_range(start=start_datetime, end=start_datetime_now, freq="30min")
294+
)
295+
weights_df[model_names] = start_weight
296+
weights_all_df.append(weights_df)
297+
286298
logger.debug(
287299
f"Making weights for {start_horizon_hour} to {end_horizon_hour} "
288300
f"hours with weights {start_weight} to {end_weight}"
289301
)
290302

291-
start_datetime = (
303+
start_datetime_one_weight = (
292304
start_datetime_now + timedelta(hours=start_horizon_hour) - timedelta(minutes=30)
293305
)
294306
if start_horizon_hour == 0:
295-
start_datetime += timedelta(minutes=30)
307+
start_datetime_one_weight += timedelta(minutes=30)
308+
296309
end_datetime = (
297310
start_datetime_now + timedelta(hours=end_horizon_hour) - timedelta(minutes=30)
298311
)
299312
if end_horizon_hour == 8:
300313
end_datetime += timedelta(minutes=30)
301314

315+
logger.debug(f"Making weights from {start_datetime_one_weight} to {end_datetime}")
302316
weights_df = pd.DataFrame(
303-
index=pd.date_range(start=start_datetime, end=end_datetime, freq="30min")
317+
index=pd.date_range(start=start_datetime_one_weight, end=end_datetime, freq="30min")
304318
)
319+
305320
# get rid of last timestamp
306321
weights_df = weights_df[:-1]
307322

308323
assert len(model_names) == len(start_weight)
309324
assert len(model_names) == len(end_weight)
310325
for model_name, start_weight, end_weight in zip(model_names, start_weight, end_weight):
311326
weights_df[model_name] = start_weight + (end_weight - start_weight) * (
312-
weights_df.index - start_datetime
313-
) / (end_datetime - start_datetime)
327+
weights_df.index - start_datetime_one_weight
328+
) / (end_datetime - start_datetime_one_weight)
314329

315330
weights_all_df.append(weights_df)
316331

317332
weights_all_df = pd.concat(weights_all_df)
318333
weights_all_df = weights_all_df[~weights_all_df.index.duplicated(keep="first")]
319334

320335
# only keep from now
321-
weights_all_df = weights_all_df[weights_all_df.index >= start_datetime_now]
336+
logger.debug(weights_all_df)
337+
logger.debug(start_datetime)
338+
weights_all_df = weights_all_df[weights_all_df.index >= start_datetime]
322339

323340
return weights_all_df

nowcasting_datamodel/read/read.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def get_latest_forecast(
124124
gsp_id: Optional[int] = None,
125125
historic: bool = False,
126126
start_target_time: Optional[datetime] = None,
127+
model_name: Optional[str] = None,
127128
) -> ForecastSQL:
128129
"""
129130
Read forecasts
@@ -135,6 +136,7 @@ def get_latest_forecast(
135136
:param historic: Option to load historic values or not
136137
:param start_target_time:
137138
Filter: forecast values target time should be larger than this datetime
139+
:param model_name: Filter: model name
138140
139141
return: List of forecasts objects from database
140142
"""
@@ -151,6 +153,7 @@ def get_latest_forecast(
151153
start_target_time=start_target_time,
152154
historic=historic,
153155
gsp_ids=gsp_ids,
156+
model_name=model_name,
154157
)
155158

156159
if forecasts is None:
@@ -255,6 +258,7 @@ def get_latest_forecast_for_gsps(
255258
preload_children: Optional[bool] = False,
256259
historic: bool = False,
257260
gsp_ids: List[int] = None,
261+
model_name: Optional[int] = None,
258262
):
259263
"""
260264
Read forecasts
@@ -268,10 +272,13 @@ def get_latest_forecast_for_gsps(
268272
:param preload_children: Option to preload children. This is a speed up, if we need them.
269273
:param historic: Option to load historic values or not
270274
:param gsp_ids: Option to filter on gsps. If None, then only the lastest forecast is loaded.
275+
:param model_name: Option to filter on model name
271276
272277
:return: List of forecasts objects from database
273278
274279
"""
280+
logger.debug(f"Getting latest forecast for gsps {gsp_ids} {historic=} {model_name=}")
281+
275282
order_by_cols = []
276283

277284
# start main query
@@ -292,6 +299,11 @@ def get_latest_forecast_for_gsps(
292299
# filter on historic
293300
query = query.filter(ForecastSQL.historic == historic)
294301

302+
# filter on model name
303+
if model_name is not None:
304+
query = query.join(MLModelSQL)
305+
query = query.filter(MLModelSQL.name == model_name)
306+
295307
# filter on target time
296308
if start_target_time is not None:
297309
query = filter_query_on_target_time(
@@ -677,7 +689,7 @@ def get_all_locations(session: Session, gsp_ids: List[int] = None) -> List[Locat
677689
return locations
678690

679691

680-
def get_model(session: Session, name: str, version: str) -> MLModelSQL:
692+
def get_model(session: Session, name: str, version: Optional[str]) -> MLModelSQL:
681693
"""
682694
Get model object from name and version
683695

nowcasting_datamodel/save/update.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,28 @@
1717
from nowcasting_datamodel.read.read import (
1818
get_latest_forecast,
1919
get_latest_forecast_for_gsps,
20-
get_model,
2120
)
2221

2322
logger = logging.getLogger(__name__)
2423

2524

26-
def get_historic_forecast(session: Session, forecast: ForecastSQL) -> ForecastSQL:
25+
def get_historic_forecast(
26+
session: Session, forecast: ForecastSQL, model_name: Optional[str] = None
27+
) -> ForecastSQL:
2728
"""
2829
Get historic forecast
2930
3031
:param session:
3132
:param gsp_id:
33+
:param model_name: the model name to filter on
3234
:return:
3335
"""
3436

3537
gsp_id = forecast.location.gsp_id
3638

37-
forecast_historic = get_latest_forecast(session=session, gsp_id=gsp_id, historic=True)
39+
forecast_historic = get_latest_forecast(
40+
session=session, gsp_id=gsp_id, historic=True, model_name=model_name
41+
)
3842

3943
if forecast_historic is None:
4044
logger.debug("Could not find a historic forecast, so will make one")
@@ -44,7 +48,7 @@ def get_historic_forecast(session: Session, forecast: ForecastSQL) -> ForecastSQ
4448
forecast_creation_time=datetime.now(timezone.utc),
4549
location=forecast.location,
4650
input_data_last_updated=forecast.input_data_last_updated,
47-
model=get_model(session=session, name="historic", version="all"),
51+
model=forecast.model,
4852
)
4953
session.add(forecast_historic)
5054
session.commit()
@@ -79,7 +83,10 @@ def upsert(session: Session, model, rows: List[dict]):
7983

8084

8185
def update_forecast_latest(
82-
forecast: ForecastSQL, session: Session, forecast_historic: Optional[ForecastSQL] = None
86+
forecast: ForecastSQL,
87+
session: Session,
88+
forecast_historic: Optional[ForecastSQL] = None,
89+
model_name: Optional[str] = None,
8390
):
8491
"""
8592
Update the forecast_values table
@@ -90,12 +97,15 @@ def update_forecast_latest(
9097
3. upsert them (update and or insert)
9198
9299
:param forecast:
100+
:param model_name: the model name to filter on
93101
:return:
94102
"""
95103

96104
# 1. get forecast object
97105
if forecast_historic is None:
98-
forecast_historic = get_historic_forecast(session=session, forecast=forecast)
106+
forecast_historic = get_historic_forecast(
107+
session=session, forecast=forecast, model_name=model_name
108+
)
99109

100110
# 2. create forecast value latest
101111
forecast_values = []
@@ -106,6 +116,7 @@ def update_forecast_latest(
106116
forecast_id=forecast_historic.id,
107117
model_id=forecast_historic.model_id,
108118
)
119+
logger.debug(f"{forecast_historic.model_id=}")
109120
forecast_values.append(forecast_value_latest.__dict__)
110121

111122
# upsert forecast values
@@ -151,6 +162,7 @@ def update_all_forecast_latest(
151162
session: Session,
152163
update_national: Optional[bool] = True,
153164
update_gsp: Optional[bool] = True,
165+
model_name: Optional[str] = None,
154166
):
155167
"""
156168
Update all latest forecasts
@@ -160,6 +172,7 @@ def update_all_forecast_latest(
160172
:param session: sqlalmacy session
161173
:param update_national: Optional (default true), to update the national forecast
162174
:param update_gsp: Optional (default true), to update all the GSP forecasts
175+
:param model_name: Optional (default None), if not None will only update the forecasts
163176
"""
164177

165178
logger.debug("Getting the earliest forecast target time for the first forecast")
@@ -184,6 +197,7 @@ def update_all_forecast_latest(
184197
preload_children=True,
185198
gsp_ids=gsp_ids,
186199
start_target_time=start_target_time,
200+
model_name=model_name,
187201
)
188202
# get all these ids, so we only have to load it once
189203
historic_gsp_ids = [forecast.location.gsp_id for forecast in forecasts_historic_all_gsps]
@@ -220,7 +234,10 @@ def update_all_forecast_latest(
220234
logger.debug(f"Found historic for GSP id {gsp_id}")
221235

222236
update_forecast_latest(
223-
forecast=forecast, session=session, forecast_historic=forecast_historic
237+
forecast=forecast,
238+
session=session,
239+
forecast_historic=forecast_historic,
240+
model_name=model_name,
224241
)
225242
session.commit()
226243

tests/read/test_blend.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
@freeze_time("2023-01-01 00:00:01")
1818
def test_make_weights_df():
19-
start_datetime = datetime(2023, 1, 1, 0, 30)
19+
start_datetime = datetime(2023, 1, 1, 0, 30, tzinfo=timezone.utc)
2020

2121
weights = make_weights_df(model_names=["test_1", "test_2"], weights=None)
2222
assert len(weights) == 16
@@ -45,9 +45,43 @@ def test_make_weights_df():
4545
assert weights["test_2"][15] == 1
4646

4747

48-
#
48+
@freeze_time("2023-01-02 00:00:01")
49+
def test_make_weights_df_yesterday():
50+
start_datetime = datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
4951

52+
weights = make_weights_df(
53+
model_names=["test_1", "test_2"], weights=None, start_datetime=start_datetime
54+
)
55+
assert len(weights) == 49 + 16
56+
assert "test_1" in weights.columns
57+
assert "test_2" in weights.columns
5058

59+
assert weights.index[0].isoformat() == start_datetime.isoformat()
60+
assert (
61+
weights.index[15].isoformat()
62+
== (start_datetime + timedelta(hours=7, minutes=30)).isoformat()
63+
)
64+
65+
assert weights["test_1"][0] == 1
66+
assert weights["test_2"][0] == 0
67+
68+
assert weights["test_1"][49] == 1
69+
assert weights["test_2"][49] == 0
70+
71+
assert weights["test_1"][49 + 3] == 1
72+
assert weights["test_2"][49 + 3] == 0
73+
74+
assert weights["test_1"][49 + 7] == 0.5
75+
assert weights["test_2"][49 + 7] == 0.5
76+
77+
assert weights["test_1"][49 + 11] == 0
78+
assert weights["test_2"][49 + 11] == 1
79+
80+
assert weights["test_1"][49 + 15] == 0
81+
assert weights["test_2"][49 + 15] == 1
82+
83+
84+
@freeze_time("2023-01-01 00:00:01")
5185
def test_get_blend_forecast_values_latest_one_model(db_session):
5286

5387
model = get_model(session=db_session, name="test_1", version="0.0.1")
@@ -86,6 +120,7 @@ def test_get_blend_forecast_values_latest_one_model(db_session):
86120
)
87121

88122

123+
@freeze_time("2023-01-01 00:00:01")
89124
def test_get_blend_forecast_values_latest_two_model_read_one(db_session):
90125

91126
model_1 = get_model(session=db_session, name="test_1", version="0.0.1")
@@ -126,6 +161,7 @@ def test_get_blend_forecast_values_latest_two_model_read_one(db_session):
126161
)
127162

128163

164+
@freeze_time("2023-01-01 00:00:01")
129165
def test_get_blend_forecast_values_latest_two_model_read_two(db_session):
130166

131167
model_1 = get_model(session=db_session, name="test_1", version="0.0.1")
@@ -142,7 +178,7 @@ def test_get_blend_forecast_values_latest_two_model_read_two(db_session):
142178
power = 2
143179
adjust = 100
144180

145-
forecast_horizon_minutes = [0, 30, 7 * 30, 15 * 30]
181+
forecast_horizon_minutes = [0, 30, 8 * 30, 15 * 30]
146182
f1[0].forecast_values_latest = [
147183
ForecastValueLatestSQL(
148184
gsp_id=1,

0 commit comments

Comments
 (0)