|
19 | 19 |
|
20 | 20 | from airflow import models
|
21 | 21 | from airflow.kubernetes.secret import Secret
|
22 |
| -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator |
23 |
| - |
| 22 | +from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import ( |
| 23 | + KubernetesPodOperator, |
| 24 | +) |
| 25 | +from kubernetes.client import models as k8s_models |
24 | 26 |
|
25 | 27 | # A Secret is an object that contains a small amount of sensitive data such as
|
26 | 28 | # a password, a token, or a key. Such information might otherwise be put in a
|
|
31 | 33 | # [START composer_kubernetespodoperator_secretobject]
|
32 | 34 | secret_env = Secret(
|
33 | 35 | # Expose the secret as environment variable.
|
34 |
| - deploy_type='env', |
| 36 | + deploy_type="env", |
35 | 37 | # The name of the environment variable, since deploy_type is `env` rather
|
36 | 38 | # than `volume`.
|
37 |
| - deploy_target='SQL_CONN', |
| 39 | + deploy_target="SQL_CONN", |
38 | 40 | # Name of the Kubernetes Secret
|
39 |
| - secret='airflow-secrets', |
| 41 | + secret="airflow-secrets", |
40 | 42 | # Key of a secret stored in this Secret object
|
41 |
| - key='sql_alchemy_conn') |
| 43 | + key="sql_alchemy_conn", |
| 44 | +) |
42 | 45 | secret_volume = Secret(
|
43 |
| - deploy_type='volume', |
| 46 | + deploy_type="volume", |
44 | 47 | # Path where we mount the secret as volume
|
45 |
| - deploy_target='/var/secrets/google', |
| 48 | + deploy_target="/var/secrets/google", |
46 | 49 | # Name of Kubernetes Secret
|
47 |
| - secret='service-account', |
| 50 | + secret="service-account", |
48 | 51 | # Key in the form of service account file name
|
49 |
| - key='service-account.json') |
| 52 | + key="service-account.json", |
| 53 | +) |
50 | 54 | # [END composer_kubernetespodoperator_secretobject]
|
51 |
| - |
52 | 55 | # If you are running Airflow in more than one time zone
|
53 | 56 | # see https://airflow.apache.org/docs/apache-airflow/stable/timezone.html
|
54 | 57 | # for best practices
|
|
58 | 61 | # will show the task as failed, as well as contain all of the task logs
|
59 | 62 | # required to debug.
|
60 | 63 | with models.DAG(
|
61 |
| - dag_id='composer_sample_kubernetes_pod', |
62 |
| - schedule_interval=datetime.timedelta(days=1), |
63 |
| - start_date=YESTERDAY) as dag: |
| 64 | + dag_id="composer_sample_kubernetes_pod", |
| 65 | + schedule_interval=datetime.timedelta(days=1), |
| 66 | + start_date=YESTERDAY, |
| 67 | +) as dag: |
64 | 68 | # Only name, namespace, image, and task_id are required to create a
|
65 | 69 | # KubernetesPodOperator. In Cloud Composer, currently the operator defaults
|
66 | 70 | # to using the config file found at `/home/airflow/composer_kube_config if
|
|
71 | 75 | # [START composer_kubernetespodoperator_minconfig]
|
72 | 76 | kubernetes_min_pod = KubernetesPodOperator(
|
73 | 77 | # The ID specified for the task.
|
74 |
| - task_id='pod-ex-minimum', |
| 78 | + task_id="pod-ex-minimum", |
75 | 79 | # Name of task you want to run, used to generate Pod ID.
|
76 |
| - name='pod-ex-minimum', |
| 80 | + name="pod-ex-minimum", |
77 | 81 | # Entrypoint of the container, if not specified the Docker container's
|
78 | 82 | # entrypoint is used. The cmds parameter is templated.
|
79 |
| - cmds=['echo'], |
| 83 | + cmds=["echo"], |
80 | 84 | # The namespace to run within Kubernetes, default namespace is
|
81 | 85 | # `default`. In Composer 1 there is the potential for
|
82 | 86 | # the resource starvation of Airflow workers and scheduler
|
|
85 | 89 | # to satisfy the computing requirements. Alternatively, launching pods
|
86 | 90 | # into a custom namespace will stop fighting over resources,
|
87 | 91 | # and using Composer 2 will mean the environment will autoscale.
|
88 |
| - namespace='default', |
| 92 | + namespace="default", |
89 | 93 | # Docker image specified. Defaults to hub.docker.com, but any fully
|
90 | 94 | # qualified URLs will point to a custom repository. Supports private
|
91 | 95 | # gcr.io images if the Composer Environment is under the same
|
92 | 96 | # project-id as the gcr.io images and the service account that Composer
|
93 | 97 | # uses has permission to access the Google Container Registry
|
94 | 98 | # (the default service account has permission)
|
95 |
| - image='gcr.io/gcp-runtimes/ubuntu_18_0_4') |
| 99 | + image="gcr.io/gcp-runtimes/ubuntu_18_0_4", |
| 100 | + ) |
96 | 101 | # [END composer_kubernetespodoperator_minconfig]
|
97 | 102 | # [START composer_kubernetespodoperator_templateconfig]
|
98 | 103 | kubenetes_template_ex = KubernetesPodOperator(
|
99 |
| - task_id='ex-kube-templates', |
100 |
| - name='ex-kube-templates', |
101 |
| - namespace='default', |
102 |
| - image='bash', |
| 104 | + task_id="ex-kube-templates", |
| 105 | + name="ex-kube-templates", |
| 106 | + namespace="default", |
| 107 | + image="bash", |
103 | 108 | # All parameters below are able to be templated with jinja -- cmds,
|
104 | 109 | # arguments, env_vars, and config_file. For more information visit:
|
105 | 110 | # https://airflow.apache.org/docs/apache-airflow/stable/macros-ref.html
|
106 |
| - |
107 | 111 | # Entrypoint of the container, if not specified the Docker container's
|
108 | 112 | # entrypoint is used. The cmds parameter is templated.
|
109 |
| - cmds=['echo'], |
| 113 | + cmds=["echo"], |
110 | 114 | # DS in jinja is the execution date as YYYY-MM-DD, this docker image
|
111 | 115 | # will echo the execution date. Arguments to the entrypoint. The docker
|
112 | 116 | # image's CMD is used if this is not provided. The arguments parameter
|
113 | 117 | # is templated.
|
114 |
| - arguments=['{{ ds }}'], |
| 118 | + arguments=["{{ ds }}"], |
115 | 119 | # The var template variable allows you to access variables defined in
|
116 | 120 | # Airflow UI. In this case we are getting the value of my_value and
|
117 | 121 | # setting the environment variable `MY_VALUE`. The pod will fail if
|
118 | 122 | # `my_value` is not set in the Airflow UI.
|
119 |
| - env_vars={'MY_VALUE': '{{ var.value.my_value }}'}, |
| 123 | + env_vars={"MY_VALUE": "{{ var.value.my_value }}"}, |
120 | 124 | # Sets the config file to a kubernetes config file specified in
|
121 | 125 | # airflow.cfg. If the configuration file does not exist or does
|
122 | 126 | # not provide validcredentials the pod will fail to launch. If not
|
123 | 127 | # specified, config_file defaults to ~/.kube/config
|
124 |
| - config_file="{{ conf.get('core', 'kube_config') }}") |
| 128 | + config_file="{{ conf.get('core', 'kube_config') }}", |
| 129 | + ) |
125 | 130 | # [END composer_kubernetespodoperator_templateconfig]
|
126 | 131 | # [START composer_kubernetespodoperator_secretconfig]
|
127 | 132 | kubernetes_secret_vars_ex = KubernetesPodOperator(
|
128 |
| - task_id='ex-kube-secrets', |
129 |
| - name='ex-kube-secrets', |
130 |
| - namespace='default', |
131 |
| - image='ubuntu', |
| 133 | + task_id="ex-kube-secrets", |
| 134 | + name="ex-kube-secrets", |
| 135 | + namespace="default", |
| 136 | + image="ubuntu", |
132 | 137 | startup_timeout_seconds=300,
|
133 | 138 | # The secrets to pass to Pod, the Pod will fail to create if the
|
134 | 139 | # secrets you specify in a Secret object do not exist in Kubernetes.
|
135 | 140 | secrets=[secret_env, secret_volume],
|
136 | 141 | # env_vars allows you to specify environment variables for your
|
137 | 142 | # container to use. env_vars is templated.
|
138 | 143 | env_vars={
|
139 |
| - 'EXAMPLE_VAR': '/example/value', |
140 |
| - 'GOOGLE_APPLICATION_CREDENTIALS': '/var/secrets/google/service-account.json '}) |
| 144 | + "EXAMPLE_VAR": "/example/value", |
| 145 | + "GOOGLE_APPLICATION_CREDENTIALS": "/var/secrets/google/service-account.json ", |
| 146 | + }, |
| 147 | + ) |
141 | 148 | # [END composer_kubernetespodoperator_secretconfig]
|
142 | 149 | # [START composer_kubernetespodaffinity]
|
143 | 150 | # Pod affinity with the KubernetesPodOperator
|
144 | 151 | # is not supported with Composer 2
|
145 | 152 | # instead, create a cluster and use the GKEStartPodOperator
|
146 | 153 | # https://cloud.google.com/composer/docs/using-gke-operator
|
147 | 154 | kubernetes_affinity_ex = KubernetesPodOperator(
|
148 |
| - task_id='ex-pod-affinity', |
149 |
| - name='ex-pod-affinity', |
150 |
| - namespace='default', |
151 |
| - image='perl:5.34.0', |
152 |
| - cmds=['perl'], |
153 |
| - arguments=['-Mbignum=bpi', '-wle', 'print bpi(2000)'], |
| 155 | + task_id="ex-pod-affinity", |
| 156 | + name="ex-pod-affinity", |
| 157 | + namespace="default", |
| 158 | + image="perl:5.34.0", |
| 159 | + cmds=["perl"], |
| 160 | + arguments=["-Mbignum=bpi", "-wle", "print bpi(2000)"], |
154 | 161 | # affinity allows you to constrain which nodes your pod is eligible to
|
155 | 162 | # be scheduled on, based on labels on the node. In this case, if the
|
156 | 163 | # label 'cloud.google.com/gke-nodepool' with value
|
157 | 164 | # 'nodepool-label-value' or 'nodepool-label-value2' is not found on any
|
158 | 165 | # nodes, it will fail to schedule.
|
159 | 166 | affinity={
|
160 |
| - 'nodeAffinity': { |
| 167 | + "nodeAffinity": { |
161 | 168 | # requiredDuringSchedulingIgnoredDuringExecution means in order
|
162 | 169 | # for a pod to be scheduled on a node, the node must have the
|
163 | 170 | # specified labels. However, if labels on a node change at
|
164 | 171 | # runtime such that the affinity rules on a pod are no longer
|
165 | 172 | # met, the pod will still continue to run on the node.
|
166 |
| - 'requiredDuringSchedulingIgnoredDuringExecution': { |
167 |
| - 'nodeSelectorTerms': [{ |
168 |
| - 'matchExpressions': [{ |
169 |
| - # When nodepools are created in Google Kubernetes |
170 |
| - # Engine, the nodes inside of that nodepool are |
171 |
| - # automatically assigned the label |
172 |
| - # 'cloud.google.com/gke-nodepool' with the value of |
173 |
| - # the nodepool's name. |
174 |
| - 'key': 'cloud.google.com/gke-nodepool', |
175 |
| - 'operator': 'In', |
176 |
| - # The label key's value that pods can be scheduled |
177 |
| - # on. |
178 |
| - 'values': [ |
179 |
| - 'pool-0', |
180 |
| - 'pool-1', |
| 173 | + "requiredDuringSchedulingIgnoredDuringExecution": { |
| 174 | + "nodeSelectorTerms": [ |
| 175 | + { |
| 176 | + "matchExpressions": [ |
| 177 | + { |
| 178 | + # When nodepools are created in Google Kubernetes |
| 179 | + # Engine, the nodes inside of that nodepool are |
| 180 | + # automatically assigned the label |
| 181 | + # 'cloud.google.com/gke-nodepool' with the value of |
| 182 | + # the nodepool's name. |
| 183 | + "key": "cloud.google.com/gke-nodepool", |
| 184 | + "operator": "In", |
| 185 | + # The label key's value that pods can be scheduled |
| 186 | + # on. |
| 187 | + "values": [ |
| 188 | + "pool-0", |
| 189 | + "pool-1", |
| 190 | + ], |
| 191 | + } |
181 | 192 | ]
|
182 |
| - }] |
183 |
| - }] |
| 193 | + } |
| 194 | + ] |
184 | 195 | }
|
185 | 196 | }
|
186 |
| - }) |
| 197 | + }, |
| 198 | + ) |
187 | 199 | # [END composer_kubernetespodaffinity]
|
188 | 200 | # [START composer_kubernetespodoperator_fullconfig]
|
189 | 201 | kubernetes_full_pod = KubernetesPodOperator(
|
190 |
| - task_id='ex-all-configs', |
191 |
| - name='pi', |
192 |
| - namespace='default', |
193 |
| - image='perl:5.34.0', |
| 202 | + task_id="ex-all-configs", |
| 203 | + name="pi", |
| 204 | + namespace="default", |
| 205 | + image="perl:5.34.0", |
194 | 206 | # Entrypoint of the container, if not specified the Docker container's
|
195 | 207 | # entrypoint is used. The cmds parameter is templated.
|
196 |
| - cmds=['perl'], |
| 208 | + cmds=["perl"], |
197 | 209 | # Arguments to the entrypoint. The docker image's CMD is used if this
|
198 | 210 | # is not provided. The arguments parameter is templated.
|
199 |
| - arguments=['-Mbignum=bpi', '-wle', 'print bpi(2000)'], |
| 211 | + arguments=["-Mbignum=bpi", "-wle", "print bpi(2000)"], |
200 | 212 | # The secrets to pass to Pod, the Pod will fail to create if the
|
201 | 213 | # secrets you specify in a Secret object do not exist in Kubernetes.
|
202 | 214 | secrets=[],
|
203 | 215 | # Labels to apply to the Pod.
|
204 |
| - labels={'pod-label': 'label-name'}, |
| 216 | + labels={"pod-label": "label-name"}, |
205 | 217 | # Timeout to start up the Pod, default is 120.
|
206 | 218 | startup_timeout_seconds=120,
|
207 | 219 | # The environment variables to be initialized in the container
|
208 | 220 | # env_vars are templated.
|
209 |
| - env_vars={'EXAMPLE_VAR': '/example/value'}, |
| 221 | + env_vars={"EXAMPLE_VAR": "/example/value"}, |
210 | 222 | # If true, logs stdout output of container. Defaults to True.
|
211 | 223 | get_logs=True,
|
212 | 224 | # Determines when to pull a fresh image, if 'IfNotPresent' will cause
|
213 | 225 | # the Kubelet to skip pulling an image if it already exists. If you
|
214 | 226 | # want to always pull a new image, set it to 'Always'.
|
215 |
| - image_pull_policy='Always', |
| 227 | + image_pull_policy="Always", |
216 | 228 | # Annotations are non-identifying metadata you can attach to the Pod.
|
217 | 229 | # Can be a large range of data, and can include characters that are not
|
218 | 230 | # permitted by labels.
|
219 |
| - annotations={'key1': 'value1'}, |
| 231 | + annotations={"key1": "value1"}, |
220 | 232 | # Optional resource specifications for Pod, this will allow you to
|
221 | 233 | # set both cpu and memory limits and requirements.
|
222 |
| - # Prior to Airflow 1.10.4, resource specifications were |
223 |
| - # passed as a Pod Resources Class object, |
224 |
| - # If using this example on a version of Airflow prior to 1.10.4, |
225 |
| - # import the "pod" package from airflow.contrib.kubernetes and use |
226 |
| - # resources = pod.Resources() instead passing a dict |
227 |
| - # For more info see: |
228 |
| - # https://github.com/apache/airflow/pull/4551 |
229 |
| - resources={'limit_memory': "250M", 'limit_cpu': "100m"}, |
| 234 | + # Prior to Airflow 2.3 and the cncf providers package 5.0.0 |
| 235 | + # resources were passed as a dictionary. This change was made in |
| 236 | + # https://github.com/apache/airflow/pull/27197 |
| 237 | + # Additionally, "memory" and "cpu" were previously named |
| 238 | + # "limit_memory" and "limit_cpu" |
| 239 | + # resources={'limit_memory': "250M", 'limit_cpu': "100m"}, |
| 240 | + container_resources=k8s_models.V1ResourceRequirements( |
| 241 | + limits={"memory": "250M", "cpu": "100m"}, |
| 242 | + ), |
230 | 243 | # Specifies path to kubernetes config. If no config is specified will
|
231 | 244 | # default to '~/.kube/config'. The config_file is templated.
|
232 |
| - config_file='/home/airflow/composer_kube_config', |
| 245 | + config_file="/home/airflow/composer_kube_config", |
233 | 246 | # If true, the content of /airflow/xcom/return.json from container will
|
234 | 247 | # also be pushed to an XCom when the container ends.
|
235 | 248 | do_xcom_push=False,
|
|
244 | 257 | # is not supported with Composer 2
|
245 | 258 | # instead, create a cluster and use the GKEStartPodOperator
|
246 | 259 | # https://cloud.google.com/composer/docs/using-gke-operator
|
247 |
| - affinity={}) |
| 260 | + affinity={}, |
| 261 | + ) |
248 | 262 | # [END composer_kubernetespodoperator_fullconfig]
|
249 | 263 | # [END composer_kubernetespodoperator]
|
0 commit comments