26
26
# Inverse Distance Weighting algorithm (DWA)
27
27
@f .pandas_udf ("YEAR integer, VALUE double" , f .PandasUDFType .GROUPED_MAP )
28
28
def phx_dw_compute (year , df ) -> pd .DataFrame :
29
- # This adjusts the rainfall / snowfall in Phoenix for a given year using Inverse Distance Weighting
30
- # based on each weather station's distance to Phoenix. The closer a station is to Phoenix, the higher
31
- # its measurement is weighed.
32
- #
33
- # This function combines the distance equation and inverse distance factor since the distance equation is:
34
- #
35
- # d = sqrt((x1-x2)^2 + (y1-y2)^2))
36
- #
37
- # and the inverse distance factor is:
38
- #
39
- # idf = 1 / d^2
40
- #
41
- # so we negate the square and square root to combine this into:
42
- #
43
- # idf = 1 / ((x1-x2)^2 + (y1-y2)^2))
44
-
45
- # Latitude and longitude of Phoenix
46
- PHX_LATITUDE = 33.4484
47
- PHX_LONGITUDE = - 112.0740
48
-
49
- inverse_distance_factors = 1.0 / \
50
- ((PHX_LATITUDE - df .LATITUDE ) ** 2 +
51
- (PHX_LONGITUDE - df .LONGITUDE ) ** 2 )
52
-
53
- # Calculate each station's weight
54
- weights = inverse_distance_factors / inverse_distance_factors .sum ()
55
-
56
- return pd .DataFrame ({"YEAR" : year , "VALUE" : (weights * df .ANNUAL_AMOUNT ).sum ()})
29
+ # This adjusts the rainfall / snowfall in Phoenix for a given year using Inverse Distance Weighting
30
+ # based on each weather station's distance to Phoenix. The closer a station is to Phoenix, the higher
31
+ # its measurement is weighed.
32
+ #
33
+ # This function combines the distance equation and inverse distance factor since the distance equation is:
34
+ #
35
+ # d = sqrt((x1-x2)^2 + (y1-y2)^2))
36
+ #
37
+ # and the inverse distance factor is:
38
+ #
39
+ # idf = 1 / d^2
40
+ #
41
+ # so we negate the square and square root to combine this into:
42
+ #
43
+ # idf = 1 / ((x1-x2)^2 + (y1-y2)^2))
44
+
45
+ # Latitude and longitude of Phoenix
46
+ PHX_LATITUDE = 33.4484
47
+ PHX_LONGITUDE = - 112.0740
48
+
49
+ inverse_distance_factors = 1.0 / (
50
+ (PHX_LATITUDE - df .LATITUDE ) ** 2 + (PHX_LONGITUDE - df .LONGITUDE ) ** 2
51
+ )
52
+
53
+ # Calculate each station's weight
54
+ weights = inverse_distance_factors / inverse_distance_factors .sum ()
55
+
56
+ return pd .DataFrame ({"YEAR" : year , "VALUE" : (weights * df .ANNUAL_AMOUNT ).sum ()})
57
+
57
58
58
59
if __name__ == "__main__" :
59
60
# read in the input argument
@@ -134,9 +135,7 @@ def phx_dw_compute(year, df) -> pd.DataFrame:
134
135
135
136
# Calculate the distance-weighted precipitation amount
136
137
phx_annual_prcp_df = (
137
- annual_df .where (
138
- (annual_df .ELEMENT == "PRCP" )
139
- )
138
+ annual_df .where ((annual_df .ELEMENT == "PRCP" ))
140
139
.groupBy ("ID" , "LATITUDE" , "LONGITUDE" , "YEAR" )
141
140
.agg (f .sum ("VALUE" ).alias ("ANNUAL_AMOUNT" ))
142
141
.groupBy ("YEAR" )
@@ -145,9 +144,7 @@ def phx_dw_compute(year, df) -> pd.DataFrame:
145
144
146
145
# Calculate the distance-weighted snowfall amount
147
146
phx_annual_snow_df = (
148
- annual_df .where (
149
- (annual_df .ELEMENT == "SNOW" )
150
- )
147
+ annual_df .where ((annual_df .ELEMENT == "SNOW" ))
151
148
.groupBy ("ID" , "LATITUDE" , "LONGITUDE" , "YEAR" )
152
149
.agg (f .sum ("VALUE" ).alias ("ANNUAL_AMOUNT" ))
153
150
.groupBy ("YEAR" )
0 commit comments