Skip to content

Commit ee70c73

Browse files
committed
revert last change
1 parent cbabba5 commit ee70c73

File tree

1 file changed

+33
-32
lines changed

1 file changed

+33
-32
lines changed

composer/2022_airflow_summit/data_analytics_process_test.py

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import uuid
2222

2323
import backoff
24-
from google.api_core.exceptions import Aborted, NotFound, AlreadyExists
24+
from google.api_core.exceptions import Aborted, NotFound
2525
from google.cloud import bigquery
2626
from google.cloud import dataproc_v1 as dataproc
2727
from google.cloud import storage
@@ -50,15 +50,12 @@
5050
PROCESSING_PYTHON_FILE = f"gs://{BUCKET_NAME}/{BUCKET_BLOB}"
5151

5252

53-
# Retry if we see a flaky 409 "subnet not ready" exception
54-
@backoff.on_exception(backoff.expo, Aborted, max_tries=3)
55-
@pytest.fixture(scope="function")
56-
def test_dataproc_batch(test_bucket, bq_dataset):
57-
# check that the results table isnt there
58-
with pytest.raises(NotFound):
59-
BQ_CLIENT.get_table(f"{BQ_DATASET}.{BQ_WRITE_TABLE}")
53+
@pytest.fixture(scope="module")
54+
def test_dataproc_batch():
6055

61-
BATCH_ID = f"summit-dag-test-{TEST_ID}" # Dataproc serverless only allows lowercase characters
56+
BATCH_ID = (
57+
f"summit-dag-test-{TEST_ID}" # Dataproc serverless only allows lowercase characters
58+
)
6259
BATCH_CONFIG = {
6360
"pyspark_batch": {
6461
"jar_file_uris": [PYSPARK_JAR],
@@ -71,27 +68,7 @@ def test_dataproc_batch(test_bucket, bq_dataset):
7168
},
7269
}
7370

74-
# create a batch
75-
dataproc_client = dataproc.BatchControllerClient(
76-
client_options={
77-
"api_endpoint": f"{DATAPROC_REGION}-dataproc.googleapis.com:443"
78-
}
79-
)
80-
request = dataproc.CreateBatchRequest(
81-
parent=f"projects/{PROJECT_ID}/regions/{DATAPROC_REGION}",
82-
batch=BATCH_CONFIG,
83-
batch_id=BATCH_ID,
84-
)
85-
86-
# Make the request
87-
operation = dataproc_client.create_batch(request=request)
88-
89-
print("Waiting for operation to complete...")
90-
91-
response = operation.result()
92-
93-
yield response
94-
71+
yield (BATCH_ID, BATCH_CONFIG)
9572
dataproc_client = dataproc.BatchControllerClient(
9673
client_options={
9774
"api_endpoint": f"{DATAPROC_REGION}-dataproc.googleapis.com:443"
@@ -133,7 +110,7 @@ def test_bucket():
133110
bucket.delete(force=True)
134111

135112

136-
@pytest.fixture(scope="module")
113+
@pytest.fixture(autouse=True)
137114
def bq_dataset(test_bucket):
138115
# Create dataset and table tfor test CSV
139116
BQ_CLIENT.create_dataset(BQ_DATASET)
@@ -170,9 +147,33 @@ def bq_dataset(test_bucket):
170147
print(f"Ignoring NotFound on cleanup, details: {e}")
171148

172149

150+
# Retry if we see a flaky 409 "subnet not ready" exception
151+
@backoff.on_exception(backoff.expo, Aborted, max_tries=3)
173152
def test_process(test_dataproc_batch):
153+
# check that the results table isnt there
154+
with pytest.raises(NotFound):
155+
BQ_CLIENT.get_table(f"{BQ_DATASET}.{BQ_WRITE_TABLE}")
156+
157+
# create a batch
158+
dataproc_client = dataproc.BatchControllerClient(
159+
client_options={
160+
"api_endpoint": f"{DATAPROC_REGION}-dataproc.googleapis.com:443"
161+
}
162+
)
163+
request = dataproc.CreateBatchRequest(
164+
parent=f"projects/{PROJECT_ID}/regions/{DATAPROC_REGION}",
165+
batch=test_dataproc_batch[1],
166+
batch_id=test_dataproc_batch[0],
167+
)
168+
# Make the request
169+
operation = dataproc_client.create_batch(request=request)
170+
171+
print("Waiting for operation to complete...")
172+
173+
response = operation.result()
174174

175-
print(test_dataproc_batch)
175+
# Handle the response
176+
print(response)
176177

177178
# check that the results table is there now
178179
assert BQ_CLIENT.get_table(f"{BQ_DATASET}.{BQ_WRITE_TABLE}").num_rows > 0

0 commit comments

Comments
 (0)