Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit 3b7f5a6

Browse files
authored
Merge pull request #347 from openclimatefix/trigonometric_time
Include trigonometric time features
2 parents 40f30a6 + 7703590 commit 3b7f5a6

File tree

5 files changed

+119
-2
lines changed

5 files changed

+119
-2
lines changed

ocf_datapipes/batch/batches.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,15 @@ class BatchKey(Enum):
118118
gsp_x_osgb_fourier = auto()
119119
gsp_time_utc_fourier = auto() # (batch_size, time, n_fourier_features)
120120

121+
# -------------- TIME -------------------------------------------
122+
# Sine and cosine of date of year and time of day at every timestep.
123+
# shape = (batch_size, n_timesteps)
124+
# This is calculated for wind only inside datapipes.
125+
wind_date_sin = auto()
126+
wind_date_cos = auto()
127+
wind_time_sin = auto()
128+
wind_time_cos = auto()
129+
121130
# -------------- SUN --------------------------------------------
122131
# Solar position at every timestep. shape = (batch_size, n_timesteps)
123132
# The solar position data comes from two alternative sources: either the Sun pre-prepared

ocf_datapipes/training/windnet.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,15 @@ def __iter__(self):
213213
numpy_modalities.append(datapipes_dict["wind"].convert_wind_to_numpy_batch())
214214

215215
logger.debug("Combine all the data sources")
216-
combined_datapipe = MergeNumpyModalities(numpy_modalities).add_sun_position(
216+
logger.debug("Adding trigonometric date and time")
217+
combined_datapipe = MergeNumpyModalities(numpy_modalities).add_trigonometric_date_time(
217218
modality_name="wind"
218219
)
220+
# combined_datapipe = MergeNumpyModalities(numpy_modalities).add_sun_position(
221+
# modality_name="wind"
222+
# )
219223

220-
logger.info("Filtering out samples with no data")
224+
# logger.info("Filtering out samples with no data")
221225
# if self.check_satellite_no_zeros:
222226
# in production we don't want any nans in the satellite data
223227
# combined_datapipe = combined_datapipe.map(check_nans_in_satellite_data)

ocf_datapipes/transform/numpy_batch/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22

33
from .add_fourier_space_time import AddFourierSpaceTimeIterDataPipe as AddFourierSpaceTime
44
from .add_topographic_data import AddTopographicDataIterDataPipe as AddTopographicData
5+
from .datetime_features import AddTrigonometricDateTimeIterDataPipe as AddTrigonometricDateTime
56
from .sun_position import AddSunPositionIterDataPipe as AddSunPosition
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""Datapipes to trigonometric date and time to NumpyBatch"""
2+
3+
import numpy as np
4+
from numpy.typing import NDArray
5+
from torch.utils.data import IterDataPipe, functional_datapipe
6+
7+
from ocf_datapipes.batch import BatchKey
8+
9+
10+
def _get_date_time_in_pi(
11+
dt: NDArray[np.datetime64],
12+
) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
13+
day_of_year = (dt - dt.astype("datetime64[Y]")).astype(int)
14+
minute_of_day = (dt - dt.astype("datetime64[D]")).astype(int)
15+
16+
# converting into positions on sin-cos circle
17+
time_in_pi = (2 * np.pi) * (minute_of_day / (24 * 3600))
18+
date_in_pi = (2 * np.pi) * (day_of_year / (365 * 24 * 3600))
19+
20+
return date_in_pi, time_in_pi
21+
22+
23+
@functional_datapipe("add_trigonometric_date_time")
24+
class AddTrigonometricDateTimeIterDataPipe(IterDataPipe):
25+
"""Adds the trigonometric encodings of date of year, time of day to the NumpyBatch"""
26+
27+
def __init__(self, source_datapipe: IterDataPipe, modality_name: str):
28+
"""
29+
Adds the sine and cosine of time to the NumpyBatch
30+
31+
Args:
32+
source_datapipe: Datapipe of NumpyBatch
33+
modality_name: Modality to add the time for
34+
"""
35+
self.source_datapipe = source_datapipe
36+
self.modality_name = modality_name
37+
assert self.modality_name in [
38+
"wind",
39+
], f"Trigonometric time not implemented for {self.modality_name}"
40+
41+
def __iter__(self):
42+
for np_batch in self.source_datapipe:
43+
time_utc = np_batch[BatchKey.wind_time_utc]
44+
45+
times: NDArray[np.datetime64] = time_utc.astype("datetime64[s]")
46+
47+
date_in_pi, time_in_pi = _get_date_time_in_pi(times)
48+
49+
# Store
50+
date_sin_batch_key = BatchKey[self.modality_name + "_date_sin"]
51+
date_cos_batch_key = BatchKey[self.modality_name + "_date_cos"]
52+
time_sin_batch_key = BatchKey[self.modality_name + "_time_sin"]
53+
time_cos_batch_key = BatchKey[self.modality_name + "_time_cos"]
54+
55+
np_batch[date_sin_batch_key] = np.sin(date_in_pi)
56+
np_batch[date_cos_batch_key] = np.cos(date_in_pi)
57+
np_batch[time_sin_batch_key] = np.sin(time_in_pi)
58+
np_batch[time_cos_batch_key] = np.cos(time_in_pi)
59+
60+
yield np_batch
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import numpy as np
2+
3+
from ocf_datapipes.transform.numpy_batch.datetime_features import _get_date_time_in_pi
4+
5+
6+
def test_get_date_time_in_pi():
7+
times = np.array(
8+
[
9+
"2020-01-01T00:00:00",
10+
"2020-04-01T06:00:00",
11+
"2020-07-01T12:00:00",
12+
"2020-09-30T18:00:00",
13+
"2020-12-31T23:59:59",
14+
"2021-01-01T00:00:00",
15+
"2021-04-02T06:00:00",
16+
"2021-07-02T12:00:00",
17+
"2021-10-01T18:00:00",
18+
"2021-12-31T23:59:59",
19+
]
20+
).reshape((2, 5))
21+
22+
expected_times_in_pi = np.array([0, 0.5 * np.pi, np.pi, 1.5 * np.pi, 2 * np.pi] * 2).reshape(
23+
(2, 5)
24+
)
25+
26+
times = times.astype("datetime64[s]")
27+
28+
date_in_pi, time_in_pi = _get_date_time_in_pi(times)
29+
30+
# Note on precision: times are compared with tolerance equivalent to 1 second,
31+
# dates are compared with tolerance equivalent to 5 minutes
32+
# None of the data we use has a higher time resolution, so this is a good test of
33+
# whether not accounting for leap years breaks things
34+
assert np.isclose(np.cos(time_in_pi), np.cos(expected_times_in_pi), atol=7.3e-05).all()
35+
assert np.isclose(np.sin(time_in_pi), np.sin(expected_times_in_pi), atol=7.3e-05).all()
36+
assert np.isclose(np.cos(date_in_pi), np.cos(expected_times_in_pi), atol=0.02182).all()
37+
assert np.isclose(np.sin(date_in_pi), np.sin(expected_times_in_pi), atol=0.02182).all()
38+
39+
# 1D array test
40+
assert np.isclose(np.cos(time_in_pi[0]), np.cos(expected_times_in_pi[0]), atol=7.3e-05).all()
41+
assert np.isclose(np.sin(time_in_pi[0]), np.sin(expected_times_in_pi[0]), atol=7.3e-05).all()
42+
assert np.isclose(np.cos(date_in_pi[0]), np.cos(expected_times_in_pi[0]), atol=0.02182).all()
43+
assert np.isclose(np.sin(date_in_pi[0]), np.sin(expected_times_in_pi[0]), atol=0.02182).all()

0 commit comments

Comments
 (0)