Skip to content

Commit d4d26d4

Browse files
leahecolepartheaengelke
authored
fix: add fixture for dataproc batch and better test cleanup (#8262)
* fix: add second exception to backoff decorator * add fixture * refactor dataproc fixture, remove check for alreadyexists * remove duplicate call to fixuttre * fix lint * switch pip version to fix test Co-authored-by: Anthonios Partheniou <[email protected]> Co-authored-by: Charles Engelke <[email protected]>
1 parent 292859c commit d4d26d4

File tree

2 files changed

+42
-20
lines changed

2 files changed

+42
-20
lines changed

composer/2022_airflow_summit/data_analytics_process_test.py

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
# GCP Project
3232
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
3333
TEST_ID = uuid.uuid4()
34+
DATAPROC_REGION = "us-central1"
35+
3436

3537
# Google Cloud Storage constants
3638
BUCKET_NAME = f"data-analytics-process-test{TEST_ID}"
@@ -44,24 +46,45 @@
4446
BQ_WRITE_TABLE = f"data-analytics-process-test-normalized-{TEST_ID}".replace("-", "_")
4547
TABLE_ID = f"{PROJECT_ID}.{BQ_DATASET}.{BQ_READ_TABLE}"
4648

47-
DATAPROC_REGION = "us-central1"
4849
PYSPARK_JAR = "gs://spark-lib/bigquery/spark-bigquery-latest_2.12.jar"
4950
PROCESSING_PYTHON_FILE = f"gs://{BUCKET_NAME}/{BUCKET_BLOB}"
5051

51-
BATCH_ID = (
52-
f"summit-dag-test-{TEST_ID}" # Dataproc serverless only allows lowercase characters
53-
)
54-
BATCH_CONFIG = {
55-
"pyspark_batch": {
56-
"jar_file_uris": [PYSPARK_JAR],
57-
"main_python_file_uri": PROCESSING_PYTHON_FILE,
58-
"args": [
59-
PROJECT_ID,
60-
f"{BQ_DATASET}.{BQ_READ_TABLE}",
61-
f"{BQ_DATASET}.{BQ_WRITE_TABLE}",
62-
],
63-
},
64-
}
52+
53+
@pytest.fixture(scope="module")
54+
def test_dataproc_batch():
55+
56+
BATCH_ID = (
57+
f"summit-dag-test-{TEST_ID}" # Dataproc serverless only allows lowercase characters
58+
)
59+
BATCH_CONFIG = {
60+
"pyspark_batch": {
61+
"jar_file_uris": [PYSPARK_JAR],
62+
"main_python_file_uri": PROCESSING_PYTHON_FILE,
63+
"args": [
64+
PROJECT_ID,
65+
f"{BQ_DATASET}.{BQ_READ_TABLE}",
66+
f"{BQ_DATASET}.{BQ_WRITE_TABLE}",
67+
],
68+
},
69+
}
70+
71+
yield (BATCH_ID, BATCH_CONFIG)
72+
dataproc_client = dataproc.BatchControllerClient(
73+
client_options={
74+
"api_endpoint": f"{DATAPROC_REGION}-dataproc.googleapis.com:443"
75+
}
76+
)
77+
request = dataproc.DeleteBatchRequest(
78+
name=f"projects/{PROJECT_ID}/locations/{DATAPROC_REGION}/batches/{BATCH_ID}"
79+
)
80+
81+
# Make the request
82+
response = dataproc_client.delete_batch(request=request)
83+
84+
# There will only be a response if the deletion fails
85+
# otherwise response will be None
86+
if response:
87+
print(response)
6588

6689

6790
@pytest.fixture(scope="module")
@@ -85,7 +108,6 @@ def test_bucket():
85108
bucket.delete(force=True)
86109

87110

88-
# TODO(coleleah): teardown any previous resources
89111
@pytest.fixture(autouse=True)
90112
def bq_dataset(test_bucket):
91113
# Create dataset and table tfor test CSV
@@ -125,7 +147,7 @@ def bq_dataset(test_bucket):
125147

126148
# Retry if we see a flaky 409 "subnet not ready" exception
127149
@backoff.on_exception(backoff.expo, Aborted, max_tries=3)
128-
def test_process(test_bucket):
150+
def test_process(test_dataproc_batch):
129151
# check that the results table isnt there
130152
with pytest.raises(NotFound):
131153
BQ_CLIENT.get_table(f"{BQ_DATASET}.{BQ_WRITE_TABLE}")
@@ -138,8 +160,8 @@ def test_process(test_bucket):
138160
)
139161
request = dataproc.CreateBatchRequest(
140162
parent=f"projects/{PROJECT_ID}/regions/{DATAPROC_REGION}",
141-
batch=BATCH_CONFIG,
142-
batch_id=BATCH_ID,
163+
batch=test_dataproc_batch[1],
164+
batch_id=test_dataproc_batch[0],
143165
)
144166
# Make the request
145167
operation = dataproc_client.create_batch(request=request)

composer/2022_airflow_summit/noxfile_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
# If you need to use a specific version of pip,
4444
# change pip_version_override to the string representation
4545
# of the version number, for example, "20.2.4"
46-
"pip_version_override": "20.2.4",
46+
"pip_version_override": "",
4747
# A dictionary you want to inject into your test. Don't put any
4848
# secrets here. These values will override predefined values.
4949
"envs": {"AIRFLOW_HOME": _tmpdir.name},

0 commit comments

Comments
 (0)