Skip to content

Commit 80e3f12

Browse files
authored
fixed jvm error (#8360)
1 parent 8a017d0 commit 80e3f12

File tree

1 file changed

+33
-32
lines changed

1 file changed

+33
-32
lines changed

composer/2022_airflow_summit/data_analytics_process_expansion.py

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,6 @@
2323
from pyspark.sql import SparkSession
2424
import pyspark.sql.functions as f
2525

26-
# Inverse Distance Weighting algorithm (DWA)
27-
@f.pandas_udf("YEAR integer, VALUE double", f.PandasUDFType.GROUPED_MAP)
28-
def phx_dw_compute(year, df) -> pd.DataFrame:
29-
# This adjusts the rainfall / snowfall in Phoenix for a given year using Inverse Distance Weighting
30-
# based on each weather station's distance to Phoenix. The closer a station is to Phoenix, the higher
31-
# its measurement is weighed.
32-
#
33-
# This function combines the distance equation and inverse distance factor since the distance equation is:
34-
#
35-
# d = sqrt((x1-x2)^2 + (y1-y2)^2))
36-
#
37-
# and the inverse distance factor is:
38-
#
39-
# idf = 1 / d^2
40-
#
41-
# so we negate the square and square root to combine this into:
42-
#
43-
# idf = 1 / ((x1-x2)^2 + (y1-y2)^2))
44-
45-
# Latitude and longitude of Phoenix
46-
PHX_LATITUDE = 33.4484
47-
PHX_LONGITUDE = -112.0740
48-
49-
inverse_distance_factors = 1.0 / (
50-
(PHX_LATITUDE - df.LATITUDE) ** 2 + (PHX_LONGITUDE - df.LONGITUDE) ** 2
51-
)
52-
53-
# Calculate each station's weight
54-
weights = inverse_distance_factors / inverse_distance_factors.sum()
55-
56-
return pd.DataFrame({"YEAR": year, "VALUE": (weights * df.ANNUAL_AMOUNT).sum()})
57-
5826

5927
if __name__ == "__main__":
6028
# read in the input argument
@@ -133,6 +101,39 @@ def phx_dw_compute(year, df) -> pd.DataFrame:
133101
states_near_phx = ["AZ", "CA", "CO", "NM", "NV", "UT"]
134102
annual_df = df.where(df.STATE.isin(states_near_phx))
135103

104+
# Inverse Distance Weighting algorithm (DWA)
105+
@f.pandas_udf("YEAR integer, VALUE double", f.PandasUDFType.GROUPED_MAP)
106+
def phx_dw_compute(year, df) -> pd.DataFrame:
107+
# This adjusts the rainfall / snowfall in Phoenix for a given year using Inverse Distance Weighting
108+
# based on each weather station's distance to Phoenix. The closer a station is to Phoenix, the higher
109+
# its measurement is weighed.
110+
#
111+
# This function combines the distance equation and inverse distance factor since the distance equation is:
112+
#
113+
# d = sqrt((x1-x2)^2 + (y1-y2)^2))
114+
#
115+
# and the inverse distance factor is:
116+
#
117+
# idf = 1 / d^2
118+
#
119+
# so we negate the square and square root to combine this into:
120+
#
121+
# idf = 1 / ((x1-x2)^2 + (y1-y2)^2))
122+
123+
# Latitude and longitude of Phoenix
124+
PHX_LATITUDE = 33.4484
125+
PHX_LONGITUDE = -112.0740
126+
127+
inverse_distance_factors = 1.0 / (
128+
(PHX_LATITUDE - df.LATITUDE) ** 2 +
129+
(PHX_LONGITUDE - df.LONGITUDE) ** 2
130+
)
131+
132+
# Calculate each station's weight
133+
weights = inverse_distance_factors / inverse_distance_factors.sum()
134+
135+
return pd.DataFrame({"YEAR": year, "VALUE": (weights * df.ANNUAL_AMOUNT).sum()})
136+
136137
# Calculate the distance-weighted precipitation amount
137138
phx_annual_prcp_df = (
138139
annual_df.where((annual_df.ELEMENT == "PRCP"))

0 commit comments

Comments
 (0)