|
23 | 23 | from pyspark.sql.functions import avg, lit, sum, year
|
24 | 24 | from pyspark.sql.types import StructType
|
25 | 25 |
|
26 |
| -# BQ_DESTINATION_DATASET_NAME = "expansion_project" |
27 |
| -# BQ_DESTINATION_TABLE_NAME = "ghcnd_stations_joined" |
28 |
| -# BQ_NORMALIZED_TABLE_NAME = "ghcnd_stations_normalized" |
29 |
| -# BQ_PRCP_MEAN_TABLE_NAME = "ghcnd_stations_prcp_mean" |
30 |
| -# BQ_SNOW_MEAN_TABLE_NAME = "ghcnd_stations_snow_mean" |
31 |
| -# BQ_PHX_PRCP_TABLE_NAME = "phx_annual_prcp" |
32 |
| -# BQ_PHX_SNOW_TABLE_NAME = "phx_annual_snow" |
33 | 26 |
|
34 | 27 | if __name__ == "__main__":
|
35 | 28 | # read in the input argument
|
|
49 | 42 | # Output table containing annual snowfall in Phoenix over the past 25 years
|
50 | 43 | PHX_SNOW_WRITE_TABLE = sys.argv[7]
|
51 | 44 |
|
52 |
| - # BUCKET_NAME = "workshop_example_bucket" |
53 |
| - # READ_TABLE = f"{BQ_DESTINATION_DATASET_NAME}.{BQ_DESTINATION_TABLE_NAME}" |
54 |
| - # DF_WRITE_TABLE = f"{BQ_DESTINATION_DATASET_NAME}.{BQ_NORMALIZED_TABLE_NAME}" |
55 |
| - # PRCP_MEAN_WRITE_TABLE = f"{BQ_DESTINATION_DATASET_NAME}.{BQ_PRCP_MEAN_TABLE_NAME}" |
56 |
| - # SNOW_MEAN_WRITE_TABLE = f"{BQ_DESTINATION_DATASET_NAME}.{BQ_SNOW_MEAN_TABLE_NAME}" |
57 |
| - # PHX_PRCP_WRITE_TABLE = f"{BQ_DESTINATION_DATASET_NAME}.{BQ_PHX_PRCP_TABLE_NAME}" |
58 |
| - # PHX_SNOW_WRITE_TABLE = f"{BQ_DESTINATION_DATASET_NAME}.{BQ_PHX_SNOW_TABLE_NAME}" |
59 |
| - |
60 | 45 | # Create a SparkSession, viewable via the Spark UI
|
61 | 46 | spark = SparkSession.builder.appName("data_processing").getOrCreate()
|
62 | 47 | # Load data into dataframe if READ_TABLE exists
|
|
0 commit comments